Esempio n. 1
0
def networkSummary(G):
    """Provides summary values about the network

    Args:
        G (graph)
            The network of strains from :func:`~constructNetwork`

    Returns:
        components (int)
            The number of connected components (and clusters)
        density (float)
            The proportion of possible edges used
        transitivity (float)
            Network transitivity (triads/triangles)
        score (float)
            A score of network fit, given by :math:`\mathrm{transitivity} * (1-\mathrm{density})`
    """
    component_assignments, component_frequencies = gt.label_components(G)
    components = len(component_frequencies)
    density = len(list(G.edges())) / (0.5 * len(list(G.vertices())) *
                                      (len(list(G.vertices())) - 1))
    transitivity = gt.global_clustering(G)[0]
    score = transitivity * (1 - density)

    return (components, density, transitivity, score)
Esempio n. 2
0
def clusteringStats(g):
    clustring_vertices = local_clustering(g).a
    print("Clusterização")
    stats(clustring_vertices)
    print("\tGlobal", global_clustering(g)[0])

    histogram(np.histogram(clustring_vertices),
              "Distribuição de clusterização", "$C_i$", "$C$",
              sys.argv[1][:-8] + ".clusterizacao")
Esempio n. 3
0
edges = []

with open("{}".format(args_main.graph), 'r') as f:
    for l in f:
        edges.append((l.split("\t")[0], l.split("\t")[2].rstrip("\n")))

g.add_edge_list(edges, hashed=True)

number_of_vertices = len(list(g.vertices()))
number_of_edges = len(list(g.edges()))

print("### STATISTICS ###")
print("\tNumber of vertices: {}".format(number_of_vertices))
print("\tNumber of edges: {}".format(number_of_edges))

clustering_coeff, std_error = gt.global_clustering(g)
avg_degree, avg_degree_std = gt.vertex_average(g, "total")
avg_in_degree, avg_in_degree_std = gt.vertex_average(g, "in")
avg_out_degree, avg_out_degree_std = gt.vertex_average(g, "out")

zero_in_deg = 0
zero_out_deg = 0
isolated_entities = 0
for v in g.vertices():
    if v.in_degree() == 0:
        zero_in_deg += 1
    if v.out_degree() == 0:
        zero_out_deg += 1
    if v.in_degree() == 0 and v.out_degree() == 0:
        isolated_entities += 1
Esempio n. 4
0
    def topology(self):
        g = self.arcgraph.copy()
        components, chist = gt.label_components(
            g, directed=False
        )  # directed = False because True would look for strongly connected components
        self.__plot_component_hist(chist, 'componenthist')
        start_components = set()
        number_compounds_in_start_components = 0
        for c in self.start_compounds:
            for v in gt.find_vertex(g, g.vp.compound_ids, c):
                start_components.add(components[v])

        cg = gt.Graph()
        cg.vertex_properties["size"] = cg.new_vertex_property("int", val=10)
        for c in start_components:
            v = cg.add_vertex()
            cg.vp.size[v] = chist[c]
            number_compounds_in_start_components += chist[c]

        satellites = set()

        clustering_coefficient = gt.global_clustering(g)
        with open(join(self.statistics_path, "clustering_coefficient.txt"),
                  'w') as f:
            f.write(
                str(clustering_coefficient[0]) + '\t' +
                str(clustering_coefficient[1]) + '\n')

        with open(join(self.statistics_path, "compounds_components.txt"), 'w') as f, \
                open(join(self.statistics_path, "component_hist.txt"), 'w') as f2:

            for componentid, elem in enumerate(chist):
                u = gt.GraphView(g, vfilt=components.a == componentid)
                u = gt.Graph(u, prune=True)

                f2.write(str(componentid + 1) + '\t' + str(elem) + '\n')

                for v in u.vertices():
                    f.write(
                        str(componentid + 1) + '\t' + u.vp.compound_ids[v] +
                        '\t' + u.vp.name[v] + '\n')

                    if componentid not in start_components:
                        satellites.add(u.vp.compound_ids[v])

                # gt.graph_draw(u, output=join(self.statistics_path, "component{i}.pdf".format(i=componentid)))

        targets_in_main_component = self.targets - satellites
        targets_in_satellites = self.targets & satellites

        with open(join(self.statistics_path, "targets_in_main_component.txt"),
                  'w') as f:
            for c in targets_in_main_component:
                compound = self.builder.compounds[c]
                f.write(c + '\t' + compound.names[0] + '\n')

        with open(join(self.statistics_path, "targets_in_satellites.txt"),
                  'w') as f:
            for c in targets_in_satellites:
                compound = self.builder.compounds[c]
                f.write(c + '\t' + compound.names[0] + '\n')

        with open(
                join(self.statistics_path,
                     "components_with_start_metabolites.txt"), 'w') as f:
            for cid in start_components:
                f.write(str(cid) + '\n')

        p = number_compounds_in_start_components / g.num_vertices() * 100

        with open(join(const.MECAT_BASE, "component_table.txt"), 'a') as f:
            f.write(self.name + ' & ' + str(len(chist)) + ' & ' +
                    str(np.amax(chist)) + ' & ' + str(len(start_components)) +
                    ' & ' + str(int(number_compounds_in_start_components)) +
                    ' & ' + str(int(round(p, 0))) + '\%' + '\\\\ \n')

        #largest = gt.label_largest_component(g, directed=False)
        #gt.graph_draw(g, vertex_fill_color=largest, output=join(self.statistics_path,"largest_component.pdf"))

        g.vertex_properties["start_components"] = g.new_vertex_property(
            "string", val='white')

        for v in g.vertices():
            if components[v] in start_components:
                g.vp.start_components[v] = 'red'
            else:
                g.vp.start_components[v] = 'blue'

        gt.graph_draw(g,
                      vertex_fill_color=g.vp.start_components,
                      output=join('/mnt', 'g', 'LisaDaten', 'Paper2',
                                  'figures', 'arcgraph' + self.name + '.pdf'))
Esempio n. 5
0
import graph_tool.all as gt
import matplotlib
FILE = '10000vertices.xml.gz'

print('loading ' + FILE + ' graph')
g = gt.load_graph(FILE)

N = len(list(g.vertices()))
M = len(list(g.edges()))

arr = []

for v in g.vertices():
    arr.append(v.out_degree())

max_degree = max(arr)
min_degree = min(arr)
avg_degree = np.mean(arr)
std_degree = np.std(arr)

print(str(N) + ' vertices')
print(str(M) + ' edges')
print('Max degree: ' + str(max_degree))
print('Min degree: ' + str(min_degree))
print('Avg degree: ' + str(avg_degree) + ' / S.D.: ' + str(std_degree))
print('Density: ' + str((2.0 * M) / (N * (N - 1.0))))
print('Pseudo-diameter: ' + str(gt.pseudo_diameter(g)[0]))
print('Global clustering: ' + str(gt.global_clustering(g)))

# gt.graph_draw(g, output_size=(5000, 5000), vertex_size=1,
#            vcmap=matplotlib.cm.gist_heat_r, output="view.png")
Esempio n. 6
0
def graph_tool_statistics():
    print("loading")
    graph = gt.load_graph("graph.graphml")
    print("computing")
    print(gt.global_clustering(graph))
Esempio n. 7
0
print(f"L = {L}")

## diameter
d = diameter(g)
print(f"diameter = {d}")

## <k>
avg_degree = L/N if g.is_directed() else 2*L/N
print(f"<k> = {avg_degree}")

## density
p = avg_degree/(N-1)
print(f"p  = {p}")

## global clustering
gc = GT.global_clustering(g)[0]
print(f"gc = {gc}")

## average shortest path
avg_sp = avg_shortest_path(g)
print(f"avg shortest path = {avg_sp}")

## giant component size
gcs = giant_component_size(g)
if g.is_directed(): print(f"biggest strong component size = {gcs}")
else: print(f"giant component size = {gcs}")

##########
# Degree #
##########
degrees = g.get_total_degrees(g.get_vertices())
Esempio n. 8
0
def gb_clus_coef(g):
    return gt.global_clustering(g)[0]
    #plt.title('Histogram of Degree Distribution\nLargest Component')
    plt.xlabel('Degree (log)')
    plt.ylabel('Frequency (log)')
    plt.savefig("degree_hist_g_friend_LC_ap1.png")
    plt.close()

    print("\n\nDesciptives: Degree Distribution - done \n")

#-- Clustering Coefficiants (Global) - of Friendship Network--#
if descCCG_bool == True:

    print("\nDesciptives: Global Clustering Coefficiants \n")
    #       coefficient,   standard deviation

    print('Global Clustering Coefficiants - Friendship Network: ',
          gt.global_clustering(
              g_friend))  # 0.101803664507653, 0.013292495836611278
    print('Global Clustering Coefficiants - Largest Component: ',
          gt.global_clustering(
              g_friend_LC))  # 0.10158343197387348, 0.013265280343602718

    print("\nDesciptives: Global Clustering Coefficiants - done\n")

### Deskriptives Friendship Network - Largest Component specific ###

#-- (Pseudo-) Diameter of Largest Component --#

if descDia_bool == True:

    print(
        "\n\nDeskriptives Friendship Network - Largest Component specific - (Pseudo-) Diameter\n"
    )
else:
    size1, size2 = hist[-1], 0
data.append(('size of 1st/2nd component',
             '{} ({:.2f}%), {}/({:.2f}%)'.format(
                 size1, 100 * size1 / g.num_vertices(),
                 size2, 100 * size2 / g.num_vertices())))

data.append(('min/max/avg degree',
             '{}/{}/{:.2f}'.format(int(deg.a.min()),
                                   int(deg.a.max()),
                                   float(deg.a.mean()))))

data.append(('density', '{:.7f}'.format(2 * g.num_edges() / g.num_vertices() / (g.num_vertices() - 1))))


data.append(('clustering coefficient (std)', '{:.2f} ({:.2f})'.format(*global_clustering(g))))


sampled_sources = np.random.permutation(g.num_vertices())[:100]
dist = np.max([pseudo_diameter(g, s)[0] for s in sampled_sources])
data.append(('pseudo diameter', dist))


data.append(('assortativity (std)', '{:.2f} ({:.2f})'.format(
    *assortativity(g, 'total'))))

index, col = zip(*data)
s = pd.Series(col, index=index)
print(s.to_string())

# save it somewhere
g.vertex_properties["color_bytype"] = color_bytype
g.vertex_properties["subgraph"] = subgraph
g.vertex_properties["shape"] = shape

for j in range(len(egdays)):
    [s, t, link] = egdays[j, [0, 1, 2]]
    edge = g.add_edge(g.vertex(int(s)), g.vertex(int(t)))
    edge_color[edge] = color_edge_dict[link]  #[x for x in color]
    edge_label[edge] = link
    edge_width[edge] = width_edge_dict[link]

g.edge_properties["edge_color"] = edge_color
g.edge_properties["edge_label"] = edge_label
g.edge_properties["edge_width"] = edge_width

print('global_clustering', gt.global_clustering(g))
print('assortativity out', gt.assortativity(g, "out"))  # correlations
print('assortativity in', gt.assortativity(g, "in"))
print('assortativity total', gt.assortativity(g, "total"))

g = gt.GraphView(g)

print('start drawing')

pos_fr = gt.fruchterman_reingold_layout(g, n_iter=1000)
g.vertex_properties["pos_fr"] = pos_fr
control = g.new_edge_property("vector<double>")
for e in g.edges():
    d = sqrt(sum((pos_fr[e.source()].a - pos_fr[e.target()].a)**2)) / 5
    control[e] = [0.3, d, 0.7, d]
g.edge_properties["control"] = control