Ejemplo n.º 1
0
    def compute(self, stats, name, pr=True):

        if not os.path.exists('pickle/' + name + 'pagerank_centrality.pickle'):
            pageRank_centrality = nx.pagerank(self.graph, alpha=0.85, weight=self.edge_attribute_for_weight)
            with open('pickle/' + name + 'pagerank_centrality.pickle', 'wb') as output:
                pickle.dump(pageRank_centrality, output, pickle.HIGHEST_PROTOCOL)

        else:
            with open('pickle/' + name + 'pagerank_centrality.pickle', 'rb') as dc:
                pageRank_centrality = pickle.load(dc)

        stats['Page Rank'] = [v for k, v in pageRank_centrality.items()]

        # top 20 nodes with highest page rank
        print(stats.sort_values(by='Page Rank', ascending=False).head(20))

        # Distribution
        distribution = stats.groupby(['Page Rank']).size().reset_index(name='Frequency')
        sum = distribution['Frequency'].sum()
        distribution['Probability'] = distribution['Frequency'] / sum

        plots.create_plot("plots/" + name + "_pageRank_distribution.pdf", "Page Rank distribution",
                          'Page Rank value', distribution['Page Rank'],
                          "Probability", distribution['Probability'],
                          xticks=[0, 0.01, 0.02, 0.03, 0.04, 0.042], yticks=[0, 0.001],
                          discrete=False)  # FIXME boundaries
        if pr:
            plt.show()

        return stats
Ejemplo n.º 2
0
    def compute(self, stats, name, pr=True):

        if not os.path.exists('pickle/' + name + '_clustering_coefficient.pickle'):
            clustering_coefficients = nx.clustering(self.graph, weight=self.edge_attribute_for_weight)
            with open('pickle/' + name + '_clustering_coefficient.pickle', 'wb') as output:
                pickle.dump(clustering_coefficients, output, pickle.HIGHEST_PROTOCOL)

        else:
            with open('pickle/' + name + '_clustering_coefficient.pickle', 'rb') as cf:
                clustering_coefficients = pickle.load(cf)

        stats['Clustering'] = [v for k, v in clustering_coefficients.items()]

        # how many nodes with clustering 1?
        max_clustering_nodes = [k for k, v in clustering_coefficients.items() if v == 1.0]
        if pr:
            print("Nodes with clustering 1.0:", len(max_clustering_nodes))
            print(max_clustering_nodes)

        # how many nodes with clustering 0.5?
        med_clustering_nodes = [k for k, v in clustering_coefficients.items() if 0.48 < v < 0.52]
        if pr:
            print("Nodes with clustering 0.5:", len(med_clustering_nodes))
            print(med_clustering_nodes)

        # how many nodes with clustering 0?
        min_clustering_nodes = [k for k, v in clustering_coefficients.items() if v == 0.0]
        if pr:
            print("Nodes with clustering 0.0:", len(min_clustering_nodes))
            print(min_clustering_nodes)

        # Clustering distribution
        distribution = stats.groupby(['Clustering']).size().reset_index(name='Frequency')
        sum = distribution['Frequency'].sum()
        distribution['Probability'] = distribution['Frequency'] / sum

        plots.create_plot("plots/" + name + "_clustering_distribution.pdf", "Clustering coefficient distribution",
                          "Clustering coefficient", distribution['Clustering'],
                          "Probability", distribution['Probability'])
        if pr:
            plt.show()


        # Average Clustering, <C>
        coefs = []
        for pair in clustering_coefficients.items():
            coefs.append(pair[1])
        average_clustering = statistics.mean(coefs)
        if pr:
            print("Average Clustering Coefficient, <C> =", average_clustering)

        return stats, average_clustering
Ejemplo n.º 3
0
    def compute(self, stats, name, pr=True):

        if not os.path.exists('pickle/' + name +
                              'eigenvector_centrality.pickle'):
            eigenvector_centrality = nx.eigenvector_centrality(
                self.graph, weight=self.edge_attribute_for_weight)
            with open('pickle/' + name + 'eigenvector_centrality.pickle',
                      'wb') as output:
                pickle.dump(eigenvector_centrality, output,
                            pickle.HIGHEST_PROTOCOL)

        else:
            with open('pickle/' + name + 'eigenvector_centrality.pickle',
                      'rb') as dc:
                eigenvector_centrality = pickle.load(dc)

        stats['Eigenvector'] = [v for k, v in eigenvector_centrality.items()]

        # top 20 nodes with highest eigenvector rating
        print(stats.sort_values(by='Eigenvector', ascending=False).head(20))

        # Distribution
        distribution = stats.groupby(['Eigenvector'
                                      ]).size().reset_index(name='Frequency')
        sum = distribution['Frequency'].sum()
        distribution['Probability'] = distribution['Frequency'] / sum

        # TODO aqui ficava melhor um histograma para ter noção da "classe alta"
        plots.create_plot("plots/" + name + "_eigenvector_distribution.pdf",
                          "Eigenvector centrality distribution",
                          'Eigenvector',
                          distribution['Eigenvector'],
                          "Probability",
                          distribution['Probability'],
                          xticks=[0, 0.01, 0.02, 0.03, 0.04, 0.042],
                          yticks=[0, 0.001],
                          discrete=False)
        if pr:
            plt.show()

        return stats
Ejemplo n.º 4
0
    def compute(self, stats, name, pr=True):

        if not os.path.exists('pickle/' + name +
                              'weighted_degree_centrality.pickle'):
            weighted_degree_list = [
                v for k, v in self.graph.degree(
                    weight=self.edge_attribute_for_weight)
            ]
            with open('pickle/' + name + 'weighted_degree_centrality.pickle',
                      'wb') as output:
                pickle.dump(weighted_degree_list, output,
                            pickle.HIGHEST_PROTOCOL)

        else:
            with open('pickle/' + name + 'weighted_degree_centrality.pickle',
                      'rb') as dc:
                weighted_degree_list = pickle.load(dc)

        stats['Weighted Degree'] = weighted_degree_list

        functions_todo = ['Weighted Degree']

        if self.directed:
            if not os.path.exists('pickle/' + name +
                                  'weighted_in_degree_centrality.pickle'):
                weighted_in_degree_list = [
                    v for k, v in self.graph.in_degree(
                        weight=self.edge_attribute_for_weight)
                ]
                with open(
                        'pickle/' + name +
                        'weighted_in_degree_centrality.pickle',
                        'wb') as output:
                    pickle.dump(weighted_in_degree_list, output,
                                pickle.HIGHEST_PROTOCOL)

            else:
                with open(
                        'pickle/' + name +
                        'weighted_in_degree_centrality.pickle', 'rb') as dc:
                    weighted_in_degree_list = pickle.load(dc)

            if not os.path.exists('pickle/' + name +
                                  'weighted_out_degree_centrality.pickle'):
                weighted_out_degree_list = [
                    v for k, v in self.graph.out_degree(
                        weight=self.edge_attribute_for_weight)
                ]
                with open(
                        'pickle/' + name +
                        'weighted_out_degree_centrality.pickle',
                        'wb') as output:
                    pickle.dump(weighted_out_degree_list, output,
                                pickle.HIGHEST_PROTOCOL)

            else:
                with open(
                        'pickle/' + name +
                        'weighted_out_degree_centrality.pickle', 'rb') as dc:
                    weighted_out_degree_list = pickle.load(dc)

            stats['Weighted In-Degree'] = weighted_in_degree_list
            stats['Weighted Out-Degree'] = weighted_out_degree_list

            functions_todo = [
                'Weighted Degree', 'Weighted In-Degree', 'Weighted Out-Degree'
            ]

        averages = dict()
        alphas = dict()
        for function in functions_todo:
            # top 10 nodes with highest weighted degree
            if pr:
                print(stats.sort_values(by=function, ascending=False).head(10))

            # Weighted Degree distribution
            distribution = stats.groupby(
                [function]).size().reset_index(name='Frequency')
            sum = distribution['Frequency'].sum()
            distribution['Probability'] = distribution['Frequency'] / sum
            distribution.head(10)

            alpha = plots.create_plot(
                "plots/" + name + "_weighted_" + function +
                "_distribution.pdf",
                "",
                function,
                distribution[function],
                "Probability",
                distribution['Probability'],
                yticks=[0, 0.001, 0.002, 0.003, 0.005, 0.01],
                also_log_scale=True,
                log_yticks=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
                powerlaw_xmin=1e1,
                powerlaw_xmax=1e4)

            if pr:
                plt.show()

            alphas[function] = alpha

            if pr:
                print(function + ' distribution gamma= ', alpha)

            # Average Degree, <k>, <k_in>, <k_out>
            average = statistics.mean(stats[function])
            averages[function] = average
            if pr:
                print("Average", function, "=", average)

        return stats, alphas, averages
Ejemplo n.º 5
0
    def compute(self, stats, name, pr=True):

        if not os.path.exists('pickle/' + name + 'degree_centrality.pickle'):
            degree_centrality = nx.degree_centrality(self.graph)
            with open('pickle/' + name + 'degree_centrality.pickle',
                      'wb') as output:
                pickle.dump(degree_centrality, output, pickle.HIGHEST_PROTOCOL)

        else:
            with open('pickle/' + name + 'degree_centrality.pickle',
                      'rb') as dc:
                degree_centrality = pickle.load(dc)

        stats['Degree'] = [v for k, v in degree_centrality.items()]

        functions_todo = ['Degree']

        if self.directed:
            if not os.path.exists('pickle/' + name +
                                  'in_degree_centrality.pickle'):
                in_degree_centrality = nx.in_degree_centrality(self.graph)
                with open('pickle/' + name + 'in_degree_centrality.pickle',
                          'wb') as output:
                    pickle.dump(in_degree_centrality, output,
                                pickle.HIGHEST_PROTOCOL)

            else:
                with open('pickle/' + name + 'in_degree_centrality.pickle',
                          'rb') as dc:
                    in_degree_centrality = pickle.load(dc)

            if not os.path.exists('pickle/' + name +
                                  'out_degree_centrality.pickle'):
                out_degree_centrality = nx.out_degree_centrality(self.graph)
                with open('pickle/' + name + 'out_degree_centrality.pickle',
                          'wb') as output:
                    pickle.dump(out_degree_centrality, output,
                                pickle.HIGHEST_PROTOCOL)

            else:
                with open('pickle/' + name + 'out_degree_centrality.pickle',
                          'rb') as dc:
                    out_degree_centrality = pickle.load(dc)

            stats['In-Degree'] = [v for k, v in in_degree_centrality.items()]
            stats['Out-Degree'] = [v for k, v in out_degree_centrality.items()]

            functions_todo = ['Degree', 'In-Degree', 'Out-Degree']

        averages = dict()
        alphas = dict()
        for function in functions_todo:
            # un-normalize
            stats[function] = round(stats[function] * (self.n_nodes - 1))
            # top 10 nodes with highest degree
            if pr:
                print(stats.sort_values(by=function, ascending=False).head(10))

            # Degree distribution
            distribution = stats.groupby(
                [function]).size().reset_index(name='Frequency')
            sum = distribution['Frequency'].sum()
            distribution['Probability'] = distribution['Frequency'] / sum
            distribution.head(10)

            alpha = plots.create_plot(
                "plots/" + name + "_degree_distribution.pdf",
                function + " distribution",
                function,
                distribution[function],
                "Probability",
                distribution['Probability'],
                yticks=[0, 0.001, 0.002, 0.003, 0.007],
                also_log_scale=True,
                log_yticks=[1e-5, 1e-4, 1e-3, 1e-2, 1e-1],
                powerlaw_xmin=1e1,
                powerlaw_xmax=1e4)

            if pr:
                plt.show()

            alphas[function] = alpha

            if pr:
                print(function + ' distribution gamma= ', alpha)

            # Average Degree, <k>, <k_in>, <k_out>
            average = statistics.mean(stats[function])
            averages[function] = average
            if pr:
                print("Average", function, "=", average)

        return stats, alphas, averages