Exemple #1
0
def calculate_centrality(graph):
    cl_unweighted = gt.closeness(graph)
    cl_distance = gt.closeness(graph, weight=graph.ep.Distance)
    bt_unweighted, ep = gt.betweenness(graph)
    bt_distance, ep = gt.betweenness(graph, weight=graph.ep.Distance)

    f = open('cl_unweighted.txt', 'w+')
    f = open('cl_unweighted.txt', 'r+')
    f.writelines(["%s\n" % item for item in cl_unweighted.a])
    f = open('cl_distance.txt', 'w+')
    f = open('cl_distance.txt', 'r+')
    f.writelines(["%s\n" % item for item in cl_distance.a])
    f = open('bt_unweighted.txt', 'w+')
    f = open('bt_unweighted.txt', 'r+')
    f.writelines(["%s\n" % item for item in bt_unweighted.a])
    f = open('bt_distance.txt', 'w+')
    f = open('bt_distance.txt', 'r+')
    f.writelines(["%s\n" % item for item in bt_distance.a])

    with open('results.csv', 'wb') as results:
        writer = csv.writer(results, delimiter=',')
        header = [
            'Name', 'Type', 'Longitude', 'Latitude', 'Closeness_Unweighted',
            'Closeness_Distance', 'Betweenness_Unweighted',
            'Betweenness_Distance'
        ]
        writer.writerow(header)
        for v in graph.vertices():
            row = [
                graph.vp.name[v], graph.vp.Type[v], graph.vp.Longitude[v],
                graph.vp.Latitude[v], cl_unweighted[v], cl_distance[v],
                bt_unweighted[v], bt_distance[v]
            ]
            writer.writerow(row)
Exemple #2
0
 def generate_report(self, analysis_type, out_file):
     if type(out_file) != str or out_file == "":
         print("Invalid output path.")
         exit(1)
     data = None
     if analysis_type == "total_degree":
         pass
     elif analysis_type == "in_degree":
         pass
     elif analysis_type == "out_degree":
         pass
     elif analysis_type == "closeness":
         data = gt.closeness(self.G, weight=self.e_weights)
     elif analysis_type == "betweenness":
         vp, ep = gt.betweenness(self.G, weight=self.e_weights)
     else:
         print('Invalid analysis type, select from:')
         print('total_degree')
         print('in_degree')
         print('out_degree')
         print('closeness')
         print('betweenness')
         exit(1)
     #data = dict(sorted(data.items(), key=operator.itemgetter(1), reverse=True))
     f = open(out_file, 'w')
     for v in self.G.vertices():
         print(self.v_labels[v], vp[v], file=f)
     f.close()
Exemple #3
0
def random_halves(g):
    for i in range(1,11):
        num = str(i)
        first = get_first(dataset,num)
        second = get_second(dataset,num)
        g1,g2 = split_graph(g,first,second)
        
        print "Calculating betweenness for first" + str(i) + "..."
        vp1,_ = gt.betweenness(g1)
        print "Done calculating betweenness!"
        pickle_result(vp1,postfix="between_first" + str(i) + "-" + dataset)
        
        print "Calculating betweenness for second" + str(i) + "..."
        vp2,_ = gt.betweenness(g2)
        print "Done calculating betweenness!"
        pickle_result(vp2,postfix="between_second" + str(i) + "-" + dataset)
Exemple #4
0
def graph_measures(graph: gt.Graph) -> pd.DataFrame:
    def get_attrs(attrs):
        return (attrs[1][0], attrs[1][1][1], attrs[0])

    def append_val(key, prop, v):
        measures[key][0].append(prop[v])

    _, vp_authority, vp_hub = gt.hits(graph)

    measures = {
        key: ([], prop)
        for key, prop in {
            'tp_group': graph.vp.group_name,
            'tp_author': graph.vp.username,
            'tn_degree_in': graph.degree_property_map('in'),
            'tn_degree_out': graph.degree_property_map('out'),
            'tn_degree_total': graph.degree_property_map('total'),
            'tn_pagerank': gt.pagerank(graph),
            'tn_betweenness': gt.betweenness(graph)[0],
            'tn_closeness': gt.closeness(graph),
            'tn_eigenvector': gt.eigenvector(graph)[1],
            'tn_authority': vp_authority,
            'tn_hub': vp_hub,
            'tn_lcc': gt.local_clustering(graph)
        }.items()
    }

    for attrs in product(graph.vertices(), measures.items()):
        append_val(*get_attrs(attrs))

    return pd.DataFrame(
        dict(map(lambda item: (item[0], item[1][0]),
                 measures.items()))).fillna(0)
def centralities(g, user_map):
    """Use graph_tool to calculate 7 centralities."""
    # degrees
    # in degree
    ki = g.degree_property_map('in')
    # out degree
    ko = g.degree_property_map('out')
    # weighted in degree
    si = g.degree_property_map('in', weight=g.ep['weight'])
    # weighted out degree
    so = g.degree_property_map('out', weight=g.ep['weight'])
    # pagerank
    pr = gt.pagerank(g)
    # betweetnness
    vb, eb = gt.betweenness(g)
    # eigenvector
    e, ev = gt.eigenvector(g)
    # screen_name
    screen_name = user_map.loc[g.vp['raw_id'].a.copy()].values
    df = pd.DataFrame(
        dict(screen_name=screen_name,
             in_degree=ki.a,
             out_degree=ko.a,
             weighted_in_degree=si.a,
             weighted_out_degree=so.a,
             page_rank=pr.a,
             betweenness=vb.a,
             eigenvector=ev.a))
    df.to_csv('centralities.raw.csv')
Exemple #6
0
def drawMST(mst, outPrefix, isolate_clustering, clustering_name, overwrite):
    """Plot a layout of the minimum spanning tree

    Args:
        mst (graph_tool.Graph)
            A minimum spanning tree
        outPrefix (str)
            Output prefix for save files
        isolate_clustering (dict)
            Dictionary of ID: cluster, used for colouring vertices
        clustering_name (str)
            Name of clustering scheme to be used for colouring
        overwrite (bool)
            Overwrite existing output files
    """
    import graph_tool.all as gt
    graph1_file_name = outPrefix + "/" + os.path.basename(
        outPrefix) + "_mst_stress_plot.png"
    graph2_file_name = outPrefix + "/" + os.path.basename(
        outPrefix) + "_mst_cluster_plot.png"
    if overwrite or not os.path.isfile(graph1_file_name) or not os.path.isfile(
            graph2_file_name):
        sys.stderr.write("Drawing MST\n")
        pos = gt.sfdp_layout(mst)
        if overwrite or not os.path.isfile(graph1_file_name):
            deg = mst.degree_property_map("total")
            deg.a = 4 * (np.sqrt(deg.a) * 0.5 + 0.4)
            ebet = gt.betweenness(mst)[1]
            ebet.a /= ebet.a.max() / 50.
            eorder = ebet.copy()
            eorder.a *= -1
            gt.graph_draw(mst,
                          pos=pos,
                          vertex_size=gt.prop_to_size(deg, mi=20, ma=50),
                          vertex_fill_color=deg,
                          vorder=deg,
                          edge_color=ebet,
                          eorder=eorder,
                          edge_pen_width=ebet,
                          output=graph1_file_name,
                          output_size=(3000, 3000))
        if overwrite or not os.path.isfile(graph2_file_name):
            cluster_fill = {}
            for cluster in set(isolate_clustering[clustering_name].values()):
                cluster_fill[cluster] = list(np.random.rand(3)) + [0.9]
            plot_color = mst.new_vertex_property('vector<double>')
            mst.vertex_properties['plot_color'] = plot_color
            for v in mst.vertices():
                plot_color[v] = cluster_fill[
                    isolate_clustering[clustering_name][mst.vp.id[v]]]

            gt.graph_draw(
                mst,
                pos=pos,
                vertex_fill_color=mst.vertex_properties['plot_color'],
                output=graph2_file_name,
                output_size=(3000, 3000))
def betweenness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4) #enable 4 threads for runing algorithm
    before_time = time.time()
    vp = gt.betweenness(Graph.g)[0] #betweenness returns two property map (vertex map and edge map) [0] means use vertex map
    values = vp.get_array()
    idBt = dict()
    for each in Graph.g.vertices():
        idBt[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idBt, key = lambda key: idBt[key], reverse = True)
    createTable(rankCommands, slist, idBt, conn, cur)
Exemple #8
0
def betweenness(rankCommands, Graph, conn, cur):
    gt.openmp_set_num_threads(4)  #enable 4 threads for runing algorithm
    before_time = time.time()
    vp = gt.betweenness(
        Graph.g
    )[0]  #betweenness returns two property map (vertex map and edge map) [0] means use vertex map
    values = vp.get_array()
    idBt = dict()
    for each in Graph.g.vertices():
        idBt[Graph.indexIdDict[each]] = values[each]
    print "Total handling time is: ", (time.time() - before_time)
    slist = sorted(idBt, key=lambda key: idBt[key], reverse=True)
    createTable(rankCommands, slist, idBt, conn, cur)
Exemple #9
0
def _refinement(graph, threshold):
    vertex_betweenness_value = gt.betweenness(graph)[0].get_array()

    d = np.abs(vertex_betweenness_value - np.median(vertex_betweenness_value))
    mdev = np.median(d)
    s = d / mdev if mdev else np.zeros_like(d)
    vfilt = s < threshold

    graph = gt.GraphView(graph, vfilt=vfilt)
    comp, hist = gt.label_components(graph)
    temp = []
    for i in range(len(hist)):
        if hist[i] > 1:
            temp.append(
                gt.Graph(gt.GraphView(graph, vfilt=(comp.a == i)),
                         prune=True,
                         directed=False))
    return temp
Exemple #10
0
def read_graph_tool(filename, rate, length):
    """Read graph from file and construct features for cnn"""
    f = open(filename, 'rb')
    node2id = dict()
    for line in f:
        seg = line.strip().split(' ')
        for v in seg:
            if v not in node2id:
                node2id[v] = len(node2id)
    n = len(node2id)
    g = gt.Graph(directed=False)
    g.add_vertex(n)
    f = open(filename, 'rb')
    for line in f:
        seg = line.strip().split(' ')
        g.add_edge(g.vertex(node2id[seg[0]]), g.vertex(node2id[seg[1]]))
    num = int(rate * n)
    vp, ep = gt.betweenness(g)
    id2node = dict(zip(node2id.values(), node2id.keys()))
def g_centrality_correlations(g):
    dgr = g.degree_property_map('total').a
    dgr = dgr / (g.num_vertices() - 1)
    btn = gt.betweenness(g, norm=True)[0].a
    cln = gt.closeness(g, norm=True, harmonic=False).a
    egn = gt.eigenvector(g)[1].a
    return dict(
        # dgr=dgr,
        # btn=btn,
        # cln=cln,
        # egn=egn,
        db_p=stats.pearsonr(dgr, btn),
        dc_p=stats.pearsonr(dgr, cln),
        de_p=stats.pearsonr(dgr, egn),
        db_s=stats.spearmanr(dgr, btn),
        dc_s=stats.spearmanr(dgr, cln),
        de_s=stats.spearmanr(dgr, egn),
        db_k=stats.kendalltau(dgr, btn),
        dc_k=stats.kendalltau(dgr, cln),
        de_k=stats.kendalltau(dgr, egn))
Exemple #12
0
def show_hvgraph(dofs, isinc, wn):
    clp, clm = get_halfvortex_clusters(dofs, wn, isinc)
    edgelist = list(zip(clp, clm))
    g = gt.Graph()
    names = g.add_edge_list(edgelist, hashed=True)
    ebet = gt.betweenness(g)[1]
    deg = g.degree_property_map("in")
    deg2 = g.degree_property_map("in")
    deg.a = 8 * deg.a**0.4
    pos = gt.sfdp_layout(g)
    gt.graph_draw(g,
                  pos=pos,
                  edge_pen_width=1,
                  vertex_size=deg,
                  edge_end_marker="none",
                  edge_mid_marker="arrow",
                  edge_marker_size=6,
                  vertex_fill_color=deg2,
                  update_layout=True,
                  sync=False,
                  display_props=[names],
                  display_props_size=20)
Exemple #13
0
    utils.pickleIt((json_full,json_tendrils),mysavepath+'.pk1')
    json_dump = json.dumps(json_full,sort_keys=True,indent=4, separators=(',', ': '))
    with open(mysavepath+'.json','w+') as file:
        file.write(json_dump)
        file.flush()
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------
#-------------------------------------------------------------------------------

directed = {}
directed['D'] = True
#directed['U'] = False

centralities = {}
#centralities['pagerank'] = gt.pagerank
centralities['betweenness'] = lambda g: gt.betweenness(g)[0]
#centralities['closeness'] = gt.closeness
#centralities['katz'] = gt.katz
#centralities['hits'] = lambda g: gt.hits(g)[2]

exclusions = [['U','katz']]

times = []

for dname,d in directed.iteritems():
    for cname,centrality in centralities.iteritems():
        if [dname,cname] not in exclusions:
            start = time.time()
            mysavepath = "%s_%s%s" % (savepath,dname,cname)

            print '------------------%s %s------------------' % (dname,cname)
Exemple #14
0
        #pickle.dump(closeness,f)
    print "vpa pickled!"


g = gt.load_graph("co-citation-AAN.graphml")
print "Loaded a graph with " + str(g.num_vertices()) + " nodes and " + str(
    g.num_edges()) + " edges."

for i in range(1, 11):
    num = str(i)
    first = get_first(num)
    second = get_second(num)
    g1, g2 = get_gt_graphs(g, first, second)

    print "Calculating betweenness for first" + str(i) + "..."
    vp1, _ = gt.betweenness(g1)
    print "Done calculating betweenness!"
    pickle_result(vp1, name="between_first" + str(i))

    print "Calculating betweenness for second" + str(i) + "..."
    vp2, _ = gt.betweenness(g2)
    print "Done calculating betweenness!"
    pickle_result(vp2, name="between_second" + str(i))

#with open("vpa-between.pickle","rb") as f:
#vpa = np.asarray(pickle.load(f))

#ids = g.vertex_properties["_graphml_vertex_id"]
# write betweenness along with corresponding id to a csv file
#g_cg = gt.load_graph("APS.graphml") # load the original citation graph
#with open("betweenness.csv","w+") as csv:
Exemple #15
0
filename = 'betweeness'

# chequea que todo sea como "debe ser"
print('chequeando...', args.file)
print('vertices', g.num_vertices())  # numero de vertices
print('edges', g.num_edges())  # numero de links

weight = g.ep['weight']
width = gt.prop_to_size(weight, ma=.5)

# seteo de algunas argumentos de la creacion del grafo

pos = g.vp['pos_sfdp_infomap']

vbet, ebet = gt.betweenness(g, weight=weight)
ebet.a /= ebet.a.max() / 10.
eorder = ebet.copy()
eorder.a *= -1
vsize = gt.prop_to_size(vbet)
vorder = -vsize.a

df = pd.DataFrame({'node': list(g.vertices()), 'betweeness': list(vbet)})
df.sort_values(by='betweeness', inplace=True, ascending=False)
print(df.head(15))

print('drawing...')
# dibuja el grafico en el archivo filename.png
gt.graph_draw(
    g,
    pos,
    # Screenshot added to Github (less loading time)
    gt.graph_draw(g_friend_LC,
                  pos=None,
                  vertex_fill_color=vprop_closeness,
                  vertex_size=gt.prop_to_size(vprop_closeness, mi=5, ma=15),
                  vcmap=plt.cm.gist_heat,
                  vorder=vprop_closeness,
                  output="closeness_g_friend_LC_ap1.pdf")

#-- Betweenness Distribution of Largest Component --#

if descBetw_bool == True:

    print("\n\n#-- Betweenness Distribution --#\n")

    vprop_betweenness, eprop_betweenness = gt.betweenness(g_friend_LC)
    g_friend_LC.vp.v_betweenness = vprop_betweenness
    g_friend_LC.ep.e_betweenness = eprop_betweenness

    v_between_array = np.array(vprop_betweenness.a)
    v_between_array_LC = v_between_array[close_array_index_LC]

    print("Avg Vertex Betweenness Centrality: ",
          sum(v_between_array_LC) /
          len(v_between_array_LC))  # 0.000163803122932315
    print("Median Betweenness Centrality: ",
          np.median(v_between_array_LC))  # 0.0
    print("Mode Betweenness Centrality: ",
          stats.mode(v_between_array_LC))  # 0.0

    plt.hist(
def process(name, g):
    # Properties
    vp_pos = gt.sfdp_layout(g)
    vp_deg = g.degree_property_map('total')
    vp_deg_log = g.new_vp('double')
    vp_deg_log.a = np.log10(vp_deg.a)
    vp_cls = gt.closeness(g)
    vp_page = gt.pagerank(g)
    vp_btw, ep_btw = gt.betweenness(g, norm=False)

    # Colormaps
    for cmap in [
            'viridis', 'plasma', 'inferno', 'YlGnBu', 'Blues', 'Greys',
            'Greens', 'Oranges'
    ]:
        draw_graph(g,
                   vp_pos,
                   f'{name}.prop=deg.color={cmap}.png',
                   vp_color=vp_deg,
                   vcmap=cmap)
        draw_graph(g,
                   vp_pos,
                   f'{name}.prop=deg_log.color={cmap}.png',
                   vp_color=vp_deg_log,
                   vcmap=cmap)
        draw_graph(g,
                   vp_pos,
                   f'{name}.prop=cls.color={cmap}.png',
                   vp_color=vp_cls,
                   vcmap=cmap)
        draw_graph(g,
                   vp_pos,
                   f'{name}.prop=page.color={cmap}.png',
                   vp_color=vp_page,
                   vcmap=cmap)
        draw_graph(g,
                   vp_pos,
                   f'{name}.prop=btw.color={cmap}.png',
                   vp_color=vp_btw,
                   vcmap=cmap)

    # Construct dicts for D3-style JSON
    nodes = []
    for u in g.vertices():
        p = vp_pos[u]
        nodes.append({
            'x': p[0],
            'y': p[1],
            'deg': vp_deg[u],
            'deg_log': vp_deg_log[u],
            'cls': vp_cls[u],
            'page': vp_page[u],
            'btw': vp_btw[u],
        })

    vp_idx = g.vertex_index
    links = [{
        'source': vp_idx[e.source()],
        'target': vp_idx[e.target()],
    } for e in g.edges()]

    # Save D3 style JSON
    d = {'nodes': nodes, 'links': links}
    with open(f'{name}.json', 'w') as f:
        json.dump(d, f)
Exemple #18
0
import os
import statistics as stats
import graph_tool.all as gt

os.chdir("/home/jen/Documents/School/GradSchool/Thesis/Images/")

g_link = gt.load_graph("Examples/ToyLinked.xml.gz")
g_bran = gt.load_graph("Examples/ToyBranching.xml.gz")

#Misc Stats
link_deg_avg, link_deg_std = gt.vertex_average(g_link, deg="total")
bran_deg_avg, bran_deg_std = gt.vertex_average(g_bran, deg="total")

#Centrality
vp_btwn_link, ep_btwn_link = gt.betweenness(g_link)
link_btwn = [vp_btwn_link[v] for v in g_link.vertices()]
vp_btwn_bran, ep_btwn_bran = gt.betweenness(g_bran)
bran_btwn = [vp_btwn_bran[v] for v in g_bran.vertices()]

link_btwn_avg = stats.mean(link_btwn)
link_btwn_std = stats.stdev(link_btwn)

bran_btwn_avg = stats.mean(bran_btwn)
bran_btwn_std = stats.stdev(bran_btwn)

#Cost and efficiency
link_mst = gt.min_spanning_tree(g_link)
bran_mst = gt.min_spanning_tree(g_bran)
link_shortest = [x for vector in gt.shortest_distance(g_link) for x in vector]
bran_shortest = [x for vector in gt.shortest_distance(g_bran) for x in vector]
Exemple #19
0
#------------------------------------------------------
#Variables:
NETWORK_FILE = ""
NETWORK_FEATURE_FILE = ""
#------------------------------------------------------

#import the graph
g = gt.load_graph(NETWORK_FILE)
#calculate the features using inbuilt graph_tool functions

#Pagerank
rank = gt.pagerank(g)
print("pagerank has been calculated")

#HITS y-hubs and x-authorities
eigenvalue, xauthorities, yhubs = gt.hits(g)
print("HITS values have been calculated")

#betweenness centrality
between_vp, between_ep = gt.betweenness(g)
print("betweenness centrality has been calculated")

#save external to internal property map
#this makes the features accessible in the future when loading the graph
g.vertex_properties["page_rank"] = rank
g.vertex_properties["x_authorities"] = xauthorities
g.vertex_properties["y_hubs"] = yhubs
g.vertex_properties["b_cent"] = between_vp

g.save(NETWORK_FEATURE_FILE, fmt='gt')
Exemple #20
0
edgeNpArr = np.array(edge_list)
print(edgeNpArr)

g = gt.Graph(directed=False)
e_weight = g.new_edge_property("float")

g.add_edge_list(edgeNpArr, hashed=True)
e_weight.a = weight_list
#vp, ep = gt.centrality.betweenness(g)
print(g.list_properties())
for e in g.edges():
    print(e)

for v in g.vertices():
    print(v)
vprop, ep = gt.betweenness(g, weight=e_weight)

g.vp.bet = vprop
for v in g.vertices():
    print(v, g.vp.bet[v])
"""if len(G) != 0:

	degree = nx.degree_centrality(G)
	for node in degree:
		asso_dict[node] = [str(degree[node])]

	betweenness = nx.betweenness_centrality(G)
	for node in betweenness:
		asso_dict[node].append(str(betweenness[node]))

	closeness = nx.closeness_centrality(G)
            common_votes = sum([ dep1[idx] == dep2[idx] for idx in range(5, len(data[1])) ])

            weight_map[e] = 1. * common_votes / len(dep1[5:])
            edges[(dep1[4],dep2[4])] = [weight_map[e], dep1, dep2] # adds for debuging

        except Exception, e:
            print str(e)


# conventional centrality analysis

# degree 
degree = g.degree_property_map('total', weight = weight_map)

# vertice betweeness
betweeness = gt.betweenness(g, weight = weight_map)

# closeness
closeness = gt.closeness(g, weight = weight_map)

# Katz
katz = gt.katz(g, weight = weight_map)

# Pagerank
pagerank = gt.pagerank(g, weight = weight_map)


metrics = ['name', 'diap', 'betweenness', 'closeness', 'degree', 'katz', 'pagerank']
df = pd.DataFrame(zip(vertex_to_name.values(), diap, degree.a.tolist(), betweeness[0].a.tolist(), closeness.a.tolist(), katz.a.tolist(), 
                      pagerank.a.tolist()), columns = metrics)
            v_prop[v1] = ind1
            v_prop[v2] = ind2
    return g









g = form_graph('./Nikite.xlsx')
deg = g.degree_property_map("in")
deg.a = 4 * (np.sqrt(deg.a) * 0.5 + 0.4)
ebet = gt.betweenness(g)[1]
ebet.a /= ebet.a.max() / 10.
eorder = ebet.copy()
eorder.a *= -1
pos = gt.sfdp_layout(g)
control = g.new_edge_property("vector<double>")
for e in g.edges():
    d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 5
    control[e] = [0.3, d, 0.7, d]
gt.graph_draw(g,
              pos=pos,
              vertex_size=deg,
              vertex_fill_color=deg,
              vorder=deg,
              edge_color=ebet,
              eorder=eorder,
Exemple #23
0
def betweenness_centrality(g):
    return gt.betweenness(g)[0].get_array()
Exemple #24
0
lonc = data.variables['lonc'][:]

ppoints = np.vstack((latc,lonc)).T
print 'Loaded'

tri, pos = gt.triangulation(ppoints, type="delaunay")
print 'Done Triangulation'

weight = tri.new_edge_property("double")

for e in tri.edges():
    weight[e] = np.sqrt(sum((np.array(pos[e.source()]) - np.array(pos[e.target()]))**2))

print 'Done weighting'

b = gt.betweenness(tri, weight=weight)
b[1].a *= 120

dist = gt.shortest_distance(tri,tri.vertex(0),tri.vertex(5),weights=weight)
path, elist = gt.shortest_path(tri,tri.vertex(0),tri.vertex(5))

print 'Done shortest distance and path'
print 'dist'
print dist
print 'path'
for i in path:
    print i


gt.graph_draw(tri, vertex_text=tri.vertex_index, edge_text=tri.edge_index,
              edge_pen_width=b[1], output_size=(1000,1000), output="triang.pdf")
Exemple #25
0
"""
Calculates betweenness centrality for a co-citation network and prints info and plots the top results
"""

#num_top = 100

g = gt.load_graph("co-citation-AAN.graphml")
g_cg = gt.load_graph("AAN.graphml") # load the original citation graph
g.set_directed(False)
titles = g.vertex_properties["title"]
authors = g.vertex_properties["authors"]
in_degs = g.degree_property_map("in")
print "Loaded a graph with " + str(g.num_vertices()) + " nodes"
#g = gt.GraphView(g, vfilt=gt.label_largest_component(g))

vp, ep = gt.betweenness(g)
#vp = gt.closeness(g)

# TODO: find out if we can pickle betweenness scores with correct indexes
# and then just load that array of floats
betweens = []
for b in vp.a:
    betweens.append(b)


#closeness = []
#for c in vp.a:
    #closeness.append(c)
with open("vpa-betweenness.pickle","wb") as f:
    pickle.dump(betweens,f)
    #pickle.dump(closeness,f)
Exemple #26
0
print(list(G.ep.keys()))

# In[9]:


# analyze betweenness centrality for generic vs. other genes
def betweenness_to_df(G, v_bw):
    vs = G.get_vertices()
    return pd.DataFrame({
        'gene': [G.vp['name'][v] for v in vs],
        'betweenness': [v_bw[v] for v in vs],
        'is_generic': [G.vp['is_generic'][v] for v in vs]
    })


v_bw, _ = gt.betweenness(G, weight=G.ep['distance'])
bw_df = betweenness_to_df(G, v_bw)
bw_df.head()

# In[10]:

sns.set({'figure.figsize': (12, 4)})
sns.set_style('whitegrid')
fig, axarr = plt.subplots(1, 2)
sns.histplot(data=bw_df,
             x='betweenness',
             hue='is_generic',
             element='step',
             stat='probability',
             common_norm=False,
             ax=axarr[0])
Exemple #27
0
 def weigh_edge_betweeness(self, scaling_factor=5):
     self.bv, self.be = gt.betweenness(self.g)
     self.be.a /= self.be.a.max() / scaling_factor
def compute_and_save_betweenness(g, filename):
    vb, eb = gt.betweenness(g)
    g.vertex_properties["betweenness"] = vb
    g.save(filename)
company_count  = len(comp_comp_tri)

for i in range(0, company_count):
    v_company[v.next()] = comp_tag_table.index[i]

g.vertex_properties['company'] =  v_company

for i in range(0, len(comp_comp_tri)):
    for j in range (0, len(comp_comp_tri)):
        if i != j and comp_comp_tri [i,j] > 0:
            e = g.add_edge(g.vertex(i), g.vertex(j))
            e_weight[e] = comp_comp_tri[i,j]

g.edge_properties['weight'] = e_weight


# to filter the graph a bit
 v_bet, e_bet = gt.betweenness(g, weight=g.edge_properties['weight'])
 pos, it = gt.graph_draw(g, vertex_fill_color = v_bet, 
    vertex_size = gt.prop_to_size(v_bet, mi=2, ma=15),
    edge_pen_width = gt.prop_to_size(e_bet, mi=0.3, ma = 5))

# could use degree to filter

deg = g.degree_property_map(deg='total', weight = g.edge_properties['weight'])
companies_list = comp_tag_table.index
companies_list[deg.a.argmax()]
v_bet, e_bet = gt.betweenness(g, weight = g.edge_properties['weight'])
companies_list[v_bet.a.argmax()]
f_g.num_vertices()

# <codecell>

v_comm = gt.community_structure(f_g, 1000, 5)
#v_comm = gt.betweenness(f_g)

# <codecell>

import numpy
u = gt.GraphView(f_g, vfilt=gt.label_largest_component(f_g))
deg = u.degree_property_map('total', weight = f_g.edge_properties['cofield'])
deg.fa = 2*(numpy.sqrt(deg.fa)*0.5  + 0.4)
edg = f_g.edge_properties['cofield']
edg.fa = (numpy.sqrt(edg.fa)*0.6+1)
ebet = gt.betweenness(f_g)[1]

# <codecell>


# <codecell>

pos, int = gt.interactive_window(u, pos=gt.radial_tree_layout(f_g, f_g.vertex(1)),
                         vertex_size = deg, 
                         vertex_fill_color = v_comm, 
                         vertex_text = f_g.vertex_properties['field'],
                         vertex_text_position = 0.2,
                         vertex_font_size = 9,
                         vertex_font_family = 'sans serif',
                         edge_pen_width = edg,
                         edge_color=ebet,
Exemple #31
0
rank = GT.pagerank(g).get_array()

plt.title("Rank distribution")
plt.ylabel('#Nodes')
plt.xlabel('Rank')
plt.bar(*float_distribution(rank, 40), width=(max(rank)-min(rank))/50)
plt.savefig(f"img/rank_dist.png", format='png')
plt.close()

print(f"top {TOP} rank nodes: {get_top(rank , TOP)}")
del rank

###############
# Betweenness #
###############
betweenness = GT.betweenness(g)[0].get_array()

plt.title("Betweenness distribution")
plt.ylabel('#Nodes')
plt.xlabel('Betweenness coefficient')
plt.bar(*float_distribution(betweenness, 40), width=(max(betweenness)-min(betweenness))/50)
plt.savefig(f"img/betweenness_dist.png", format='png')
plt.close()

print(f"top {TOP} betweenness nodes: {get_top(betweenness, TOP)}")
del betweenness

#############
# Closeness #
#############
closeness = GT.closeness(GT.extract_largest_component(g), norm=False, harmonic=True).get_array()
Exemple #32
0
import graph_tool.all as gt
from math import sqrt
import numpy as np

g = gt.price_network(1500)
deg = g.degree_property_map("in")
deg.a = 4 * (np.sqrt(deg.a) * 0.5 + 0.4)
ebet = gt.betweenness(g)[1]
ebet.a /= ebet.a.max() / 10.
eorder = ebet.copy()
eorder.a *= -1
pos = gt.sfdp_layout(g)
control = g.new_edge_property("vector<double>")
for e in g.edges():
    d = sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 5
    control[e] = [0.3, d, 0.7, d]

gt.graph_draw(g, pos=pos, vertex_size=deg, vertex_fill_color=deg, vorder=deg,
              edge_color=ebet, eorder=eorder, edge_pen_width=ebet,
              edge_control_points=control # some curvy edges
              )



raw_input("Press Enter to Continue")

v_fill = [0.6, 0.6, 0.6, 1]
gt.graph_draw(g, pos=pos, vertex_size=deg, vertex_fill_color=v_fill, vorder=deg,
              edge_color=ebet, eorder=eorder, edge_pen_width=ebet,
              edge_control_points=control # some curvy edges
              )
Exemple #33
0
        weight_dict['weight'] = weight
        edgeCoocList.append((edge[0], edge[1], weight_dict))
    print("3")
    gCooc = nx.Graph()
    gCooc.add_edges_from(edgeCoocList)

    gCoocNode = nx.Graph()
    gCoocNode = gCooc

    gtgCooc = nx2gt_module.nx2gt(gCooc)

    CoocEdgeWeight = gtgCooc.edge_properties['weight']
    CoocVertexId = gtgCooc.vertex_properties['id']
    CoocVertexIter = gtgCooc.vertices()
    print("4")
    CoocBetween, ep = gt.betweenness(gtgCooc, weight=CoocEdgeWeight, norm=True)
    print("5")
    #ee, CoocEigen = gt.eigenvector(gtgCooc, weight=CoocEdgeWeight)
    print("6")
    #ee, CoocAuthority, CoocHub = gt.hits(gtgCooc, weight =CoocEdgeWeight)
    #CoocPagerank = gt.pagerank(gtgCooc, weight =CoocEdgeWeight)
    CoocCloseness = gt.closeness(gtgCooc, weight=CoocEdgeWeight)

    print("7")
    CoocKatz = gt.katz(gtgCooc, weight=CoocEdgeWeight)
    CoocClustering = gt.local_clustering(gtgCooc)
    print("8")
    CoocDegree = gtgCooc.degree_property_map("total", weight=CoocEdgeWeight)
    print("9")
    print("where")
    print("A")