def community_evaluation_metrics(cdlib_coms): uG = cdlib_coms.graph # https://cdlib.readthedocs.io/en/latest/reference/evaluation.html embeddedness = evaluation.avg_embeddedness(uG, cdlib_coms, summary=False) average_internal_degree = evaluation.average_internal_degree(uG, cdlib_coms, summary=False) conductance = evaluation.conductance(uG, cdlib_coms, summary=False) transitivity = evaluation.avg_transitivity(uG, cdlib_coms, summary=False) cut_ratio = evaluation.cut_ratio(uG, cdlib_coms, summary=False) expansion = evaluation.expansion(uG, cdlib_coms, summary=False) edges_inside = evaluation.edges_inside(uG, cdlib_coms, summary=False) fraction_over_median_degree = evaluation.fraction_over_median_degree( uG, cdlib_coms, summary=False) hub_dominance = evaluation.hub_dominance(uG, cdlib_coms, summary=False) internal_edge_density = evaluation.internal_edge_density(uG, cdlib_coms, summary=False) max_odf = evaluation.max_odf(uG, cdlib_coms, summary=False) avg_odf = evaluation.avg_odf(uG, cdlib_coms, summary=False) flake_odf = evaluation.flake_odf(uG, cdlib_coms, summary=False) size = evaluation.size(uG, cdlib_coms, summary=False) triangle_participation_ratio = evaluation.triangle_participation_ratio( uG, cdlib_coms, summary=False) eval_dict = { 'embeddedness': embeddedness, 'average_internal_degree': average_internal_degree, 'conductance': conductance, 'transitivity': transitivity, 'cut_ratio': cut_ratio, 'expansion': expansion, 'edges_inside': edges_inside, 'fraction_over_median_degree': fraction_over_median_degree, 'hub_dominance': hub_dominance, 'internal_edge_density': internal_edge_density, 'max_odf': max_odf, 'avg_odf': avg_odf, 'flake_odf': flake_odf, 'size': size, 'triangle_participation_ratio': triangle_participation_ratio } com_eval_df = pd.DataFrame(eval_dict)\ .reset_index()\ .rename({'index':'community_id'}, axis = 1) com_eval_df['community_id'] = com_eval_df['community_id'] + 1 return com_eval_df
def internal_edge_density(self, **kwargs): """ The internal density of the algorithms set. .. math:: f(S) = \\frac{m_S}{n_S(n_S−1)/2} where :math:`m_S` is the number of algorithms internal edges and :math:`n_S` is the number of algorithms nodes. :param summary: (optional, default True) if **True**, an overall summary is returned for the partition (min, max, avg, std); if **False** a list of community-wise score :return: a FitnessResult object/a list of community-wise score :Example: >>> from cdlib.algorithms import louvain >>> g = nx.karate_club_graph() >>> communities = louvain(g) >>> mod = communities.internal_edge_density() """ if self.__check_graph(): return evaluation.internal_edge_density(self.graph, self, **kwargs) else: raise ValueError("Graph instance not specified")
def getAllScoresDict(g, _reference, _communities, executionTime): scores = {} scores['time'] = executionTime reference = copy.deepcopy(_reference) reference.communities = complete_partition(reference.communities, g, mode='new_cluster') communities = copy.deepcopy(_communities) communities.communities = complete_partition(communities.communities, g, mode='new_cluster') # scores['adjusted_mutual_information'] = evaluation.adjusted_mutual_information(reference,communities).score # returns MatchingResult object # scores['adjusted_rand_index'] = evaluation.adjusted_rand_index(reference,communities).score # Compute the average F1 score of the optimal algorithms matches among the partitions in input. try: scores['f1'] = evaluation.f1(reference, communities).score except: scores['f1'] = np.nan # Compute the Normalized F1 score of the optimal algorithms matches among the partitions in input. try: scores['nf1'] = evaluation.nf1(reference, communities).score except: scores['nf1'] = np.nan # Normalized Mutual Information between two clusterings. # scores['normalized_mutual_information'] = evaluation.normalized_mutual_information(reference, communities)[0] # Index of resemblance for overlapping, complete coverage, network clusterings. try: scores['omega'] = evaluation.omega(reference, communities).score except: scores['omega'] = np.nan # Overlapping Normalized Mutual Information between two clusterings. try: scores['overlapping_normalized_mutual_information_LFK'] = evaluation.overlapping_normalized_mutual_information_LFK(reference, communities)[0] except: scores['overlapping_normalized_mutual_information_LFK'] = np.nan # Overlapping Normalized Mutual Information between two clusterings. # scores['overlapping_normalized_mutual_information_MGH'] = evaluation.overlapping_normalized_mutual_information_MGH(reference, communities)[0] # Variation of Information among two nodes partitions. # scores['variation_of_information'] = evaluation.variation_of_information(reference, communities)[0] # scores['avg_distance'] = evaluation.avg_distance(g,communities, summary=True) try: scores['avg_embeddedness'] = evaluation.avg_embeddedness(g,communities, summary=True).score except: scores['avg_embeddedness'] = np.nan try: scores['average_internal_degree'] = evaluation.average_internal_degree(g,communities, summary=True).score except: scores['average_internal_degree'] = np.nan # scores['avg_transitivity'] = evaluation.avg_transitivity(g,communities, summary=True) # Fraction of total edge volume that points outside the community. try: scores['conductance'] = evaluation.conductance(g,communities, summary=True).score except: scores['conductance'] = np.nan # Fraction of existing edges (out of all possible edges) leaving the community. try: scores['cut_ratio'] = evaluation.cut_ratio(g,communities, summary=True).score except: scores['cut_ratio'] = np.nan # Number of edges internal to the community try: scores['edges_inside'] = evaluation.edges_inside(g,communities, summary=True).score except: scores['edges_inside'] = np.nan # Number of edges per community node that point outside the cluster try: scores['expansion'] = evaluation.expansion(g,communities, summary=True).score except: scores['expansion'] = np.nan # Fraction of community nodes of having internal degree higher than the median degree value. try: scores['fraction_over_median_degree'] = evaluation.fraction_over_median_degree(g,communities, summary=True).score except: scores['fraction_over_median_degree'] = np.nan # The hub dominance of a community is defined as the ratio of the degree of its most connected node w.r.t. the theoretically maximal degree within the community. # scores['hub_dominance'] = evaluation.hub_dominance(g,communities, summary=True) # The internal density of the community set. try: scores['internal_edge_density'] = evaluation.internal_edge_density(g,communities, summary=True).score except: scores['internal_edge_density'] = np.nan # Normalized variant of the Cut-Ratio try: scores['normalized_cut'] = evaluation.normalized_cut(g,communities, summary=True).score except: scores['normalized_cut'] = np.nan # Maximum fraction of edges of a node of a community that point outside the community itself. # scores['max_odf'] = evaluation.max_odf(g,communities, summary=True) # Average fraction of edges of a node of a community that point outside the community itself. # scores['avg_odf'] = evaluation.avg_odf(g,communities, summary=True) # Fraction of nodes in S that have fewer edges pointing inside than to the outside of the community. # scores['flake_odf'] = evaluation.flake_odf(g,communities, summary=True) # The scaled density of a community is defined as the ratio of the community density w.r.t. the complete graph density. try: scores['scaled_density'] = evaluation.scaled_density(g,communities, summary=True).score except: scores['scaled_density'] = np.nan # Significance estimates how likely a partition of dense communities appear in a random graph. try: scores['significance'] = evaluation.significance(g,communities).score except: scores['significance'] = np.nan # Size is the number of nodes in the community try: scores['size'] = evaluation.size(g,communities, summary=True).score except: scores['size'] = np.nan # Surprise is statistical approach proposes a quality metric assuming that edges between vertices emerge randomly according to a hyper-geometric distribution. # According to the Surprise metric, the higher the score of a partition, the less likely it is resulted from a random realization, the better the quality of the community structure. try: scores['surprise'] = evaluation.surprise(g,communities).score except: scores['surprise'] = np.nan try: scores['modularity_density'] = evaluation.modularity_density(g,communities).score except: scores['modularity_density'] = np.nan # Fraction of community nodes that belong to a triad. # scores['triangle_participation_ratio'] = evaluation.triangle_participation_ratio(g,communities, summary=True) # Purity is the product of the frequencies of the most frequent labels carried by the nodes within the communities # scores['purity'] = evaluation.purity(communities) return scores
forDf['graph'].append(graph) forDf['method'].append(method) forDf['score'].append(score) df = pd.DataFrame(forDf) df.to_csv('cdlib_results_legacy_part.csv') graphStatistics = {'graph': [], 'statistic': [], 'value':[]} statistics = { '$|V|$': lambda graph, reference: len(graph.nodes), '$|E|$': lambda graph, reference: len(graph.edges), '$\\hat{k}$': lambda graph, reference: len(reference.communities), '$\\hat{o}$': lambda graph, reference: getOverlappingNumber(reference.communities), '$\\hat{c}$': lambda graph, reference: nx.average_clustering(graph), '\\shortstack[l]{normalized \\\\ cut}': lambda graph, reference: evaluation.normalized_cut(graph,reference, summary=True).score, '\\shortstack[l]{internal \\\\ edge \\\\density}': lambda graph, reference: evaluation.internal_edge_density(graph,reference, summary=True).score, 'significance': lambda graph, reference: evaluation.significance(graph,reference, summary=True).score, '\\shortstack[l]{avarage \\\\ internal \\\\degree}': lambda graph, reference: evaluation.average_internal_degree(graph,reference, summary=True).score, '\\shortstack[l]{modularity \\\\ density}': lambda graph, reference: evaluation.modularity_density(graph,reference).score, } for graph, name, reference in zip(graphs, names, references): for statistic_name, statistic in statistics.items(): graphStatistics['graph'].append(name) graphStatistics['statistic'].append(statistic_name) graphStatistics['value'].append(statistic(graph, reference) ) graphStatisticsDF = pd.DataFrame(graphStatistics) graphStatisticsDF.to_csv('../Results/graphs_stats_legacy.csv') # method that were not used, or don't work by some reason