Beispiel #1
0
def degreeStats(g):
    avg, std = vertex_average(g, "total" if not g.is_directed() else "in")
    total_degrees = g.get_out_degrees(g.get_vertices())
    print("Graus")
    stats(total_degrees)
    print("\tDesvio padrão (graphtools): ", std)

    distribution = vertex_hist(g, "total" if not g.is_directed() else "in")
    histogram(distribution, "Distribuição de graus", "$k_{total}$",
              "$NP(k_{in})$", sys.argv[1][:-8] + ".graus")
Beispiel #2
0
    def degree_distribution(self, g, name):
        total_hist = gt.vertex_hist(g, "total", float_count=False)
        self.__plot_degree(total_hist,
                           self.name + ' ' + name + ' ' + "totaldegdist.pdf",
                           "total node degree")

        # in_hist = gt.vertex_hist(g, "in", float_count=False)
        # self.__plot_degree(in_hist, self.name + ' ' + name + ' ' + "indegdistloglog.pdf", "in node degree")
        #
        # out_hist = gt.vertex_hist(g, "out", float_count=False)
        # self.__plot_degree(out_hist, self.name + ' ' + name + ' ' + "outdegdistloglog.pdf", "out node degree")

        [atot, stdm] = gt.vertex_average(g, "total")
        stdtot = stdm * np.sqrt(g.num_vertices())
        return atot, stdtot, stdm
Beispiel #3
0
Wuhan, China 
'''

import graph_tool.all as gt
from math import *
from pylab import *

#g = gt.collection.data['cond-mat-2005']
#cond-mat-2005 updated network of coauthorships between scientists posting preprints on the Condensed Matter E-Print Archive.
#g = gt.collection.data['email-Enron']
#Enron email communication network covers all the email communication within a dataset of around half million emails.
g = gt.collection.data['pgp-strong-2009']
#Strongly connected component of the PGP web of trust circa November 2009. The full data is available at http://key-server.de/dump/.

# Let's plot its in-degree distribution
in_hist = gt.vertex_hist(g, "total")

y = in_hist[0]
err = sqrt(in_hist[0])
err[err >= y] = y[err >= y] - 1e-3

figure(figsize=(6, 4))
errorbar(in_hist[1][:-1], in_hist[0], fmt="o", yerr=err, label="all")
gca().set_yscale("log")
gca().set_xscale("log")
gca().set_ylim(1e-1, 1.1 * 1e4)
gca().set_xlim(1, 1e4)
subplots_adjust(left=0.2, bottom=0.2)
xlabel("$k_{all}$")
ylabel("$N*Pr(k_{all})$")
tight_layout()
Beispiel #4
0
def compute_node_degree_hist(g, direction):
    return gtall.vertex_hist(g, direction, float_count=False)
Beispiel #5
0
'''

import graph_tool.all as gt
from math import *
from pylab import *

#g = gt.collection.data['cond-mat-2005']
#cond-mat-2005 updated network of coauthorships between scientists posting preprints on the Condensed Matter E-Print Archive.
#g = gt.collection.data['email-Enron']
#Enron email communication network covers all the email communication within a dataset of around half million emails.
g = gt.collection.data['pgp-strong-2009']
#Strongly connected component of the PGP web of trust circa November 2009. The full data is available at http://key-server.de/dump/.


# Let's plot its in-degree distribution
in_hist = gt.vertex_hist(g, "total")

y = in_hist[0]
err = sqrt(in_hist[0])
err[err >= y] = y[err >= y] - 1e-3

figure(figsize=(6,4))
errorbar(in_hist[1][:-1], in_hist[0], fmt="o", yerr=err,
        label="all")
gca().set_yscale("log")
gca().set_xscale("log")
gca().set_ylim(1e-1, 1.1*1e4)
gca().set_xlim(1, 1e4)
subplots_adjust(left=0.2, bottom=0.2)
xlabel("$k_{all}$")
ylabel("$N*Pr(k_{all})$")
Beispiel #6
0
def statistics(G):
    """Provides general graph statistics.

    Args:
        G (graph_tool.Graph): The graph instance.

    Returns:
        An object with describing many statistical properties of the graph.
    """

    if not G:
        return 'No Graph Loaded'
    float_formatter = lambda x: '{:.2f}'.format(x)

    if G.get_vertex_filter()[0] is not None:
        vfilt = G.get_vertex_filter()[0]
        v_idx = np.where(vfilt.a == 1)[0]
    else:
        v_idx = np.arange(G.num_vertices())

    deg_counts, deg_bins = gt.vertex_hist(G, 'out', float_count=False)
    incl_idx = np.where(deg_counts != 0)[0]
    deg_bins = list(deg_bins[incl_idx])
    deg_counts = list(deg_counts[incl_idx])

    comp, cc_hist = gt.label_components(G)
    cc_size_counts = sorted(Counter(cc_hist).items())
    cc_sizes = [csc[0] for csc in cc_size_counts]
    cc_counts = [csc[1] for csc in cc_size_counts]

    num_cc = len(np.unique(comp.a))
    if deg_bins[0] == 0:
        num_singletons = deg_counts[0]
    else:
        num_singletons = 0

    if G.get_vertex_filter()[0] or G.get_edge_filter()[0]:
        # Always correct, but much slower
        peel_partition = kcore_decomposition(G)
        peel_bins = sorted(peel_partition.keys())
        peel_counts = [len(peel_partition[k]) for k in peel_bins]
    else:
        # NOTE:
        # Very fast, but unstable (not always accurate) for graphs with filters
        kcore = gt.kcore_decomposition(G)
        C = Counter(kcore.a[v_idx])
        peel_bins, peel_counts = [list(t) for t in zip(*C.items())]

    vlogv = G.num_vertices() * np.log2(G.num_vertices())

    return {
        'num_vertices': G.num_vertices(),
        'num_edges': G.num_edges(),
        'num_cc': num_cc,
        'num_singletons': num_singletons,
        'vlogv': float_formatter(vlogv),
        'deg_bins': deg_bins,
        'deg_counts': deg_counts,
        'cc_sizes': cc_sizes,
        'cc_counts': cc_counts,
        'peel_bins': peel_bins,
        'peel_counts': peel_counts,
    }
    print("Maximum of all Degrees: ", max(degree_list_LC))  #   2025
    print("Minimum of all Degrees: ", min(degree_list_LC))  #      1
    print("Length of Degree List / Number of Nodes: ",
          len(degree_list_LC))  #  16382
    print("Avg Degree: ",
          sum(degree_list_LC) / len(degree_list_LC))  #     11.444146013917715
    print("Median Degree: ", np.median(degree_list_LC))  #      2.0
    print("Mode Degree: ", stats.mode(degree_list_LC)[0][0])  #      1

    print("\n\nDescriptives: Average Degree - done\n")

#-- Degree Distribution of Friendship Network --#
if descDDist_bool == True:
    print("\n\nDesciptives: Degree Distribution - Friendship Network")

    degree_hist = gt.vertex_hist(g_friend, "out")

    #print("Degree Distribution Frequency: \n", degree_hist[0])
    #print("Degree Distribution Values: \n", degree_hist[1])
    #print(len(degree_hist[0]), len(degree_hist[1]))

    y = degree_hist[0]
    x = degree_hist[
        1][:
           -1]  # gt.vertex_hist results in a 2d histogram array [[frequency][degree]]
    # degree_hist that has 1 entry too much in the second
    # For whatever reason, the second dimension [degree] hast one entry too much
    # The last value of [degree]is excluded manually for both diemnsions being
    # of same size. There is no theoretical explanation for why the last value
    # should be part of the array. Confusing, but not relevant for the following analysis