Ejemplo n.º 1
0
    def __init__(self, g, author, filename, entity_graph):  
                
        # Basics
        self.author_ = author
        self.edge_weights_ = g.edge_properties["weights"] 
        self.pos_ = g.vertex_properties["pos"]
        self.v_count_ = g.vertex_properties["v_count"]
        self.gt_graph_ = g
        self.filename_ = re.sub('_entgraph','',filename)
               
        # Number of edges and verties, density
        self.num_edges_ = g.num_edges()
        self.num_edges  = g.num_edges()
        self.num_nodes_ = g.num_vertices()
        
        self.num_poss_edges_ = (self.num_nodes_*(self.num_nodes_-1))/2
        self.density_ = self.num_edges_/self.num_poss_edges_        
        self.density_norm = self.density_/self.num_edges_

        # Degree
        self.vertex_avg_, self.vertex_avg_var = gt.graph_tool.stats.vertex_average(g, "total")
        self.vertex_avg_in_, self.vertex_avg_in_var_ = gt.graph_tool.stats.vertex_average(g, "in")
        self.vertex_avg_out_, self.vertex_avg_out_var_ = gt.graph_tool.stats.vertex_average(g, "out")
        self.edge_avg, self.edge_avg_var = gt.graph_tool.stats.edge_average(g, eprop=self.edge_weights_)
        
        self.vertex_avg_norm = self.vertex_avg_/self.num_edges_
        self.edge_avg_norm = self.edge_avg/self.num_edges_

        # Vertex and edge histograms
        self.vertex_hist_ = gt.graph_tool.stats.vertex_hist(g, deg='total', )
        self.vertex_hist_in_ = gt.graph_tool.stats.vertex_hist(g, deg='in', bins=range(0,self.num_nodes_))
        self.vertex_hist_out_ = gt.graph_tool.stats.vertex_hist(g, deg='out', bins=range(0,self.num_nodes_))
        self.edge_hist_ = gt.graph_tool.stats.edge_hist(g,eprop=self.edge_weights_, bins=np.arange(0.0,1.0,0.01))
        
        self.degrees_ = get_values_from_histo(self.vertex_hist_)
        self.degrees_mean, self.degrees_var, self.degrees_skew, self.degrees_kurtosis = get_moments(self.degrees_)

        self.degrees_in_ = get_values_from_histo(self.vertex_hist_in_)
        self.degrees_in_mean_, self.degrees_in_var, self.degrees_in_skew, self.degrees_in_kurtosis = get_moments(self.degrees_in_)

        self.degrees_out_ = get_values_from_histo(self.vertex_hist_out_)
        self.degrees_out_mean_, self.degrees_out_var, self.degrees_out_skew, self.degrees_out_kurtosis = get_moments(self.degrees_out_)

        self.weights_ = get_values_from_histo(self.edge_hist_)
        self.weights_mean, self.weights_var, self.weights_skew, self.weights_kurtosis = get_moments(self.weights_)
        
        self.degrees_mean_norm = self.degrees_mean/self.num_edges_
        self.weights_mean_norm = self.weights_mean/self.num_edges_
        
        self.edge_weights_mean_, self.edge_weights_var, self.edge_weights_skew, self.edge_weights_kurtosis = get_moments(self.edge_weights_.a)
        self.edge_weights_mean_norm = self.edge_weights_mean_/self.num_edges_                    
            
        # Distance metrices
        self.dist_histogram_ = gt.graph_tool.stats.distance_histogram(g, bins = range(0,10))
        self.avg_shortest_path = np.mean(get_values_from_histo(self.dist_histogram_))

        self.diameter = np.max(get_values_from_histo(self.dist_histogram_))
        self.pseudo_diameter_ = gt.pseudo_diameter(g)[0] 
        
        self.diameter_norm = self.diameter/self.num_edges_
        self.avg_shortest_path_norm = self.avg_shortest_path/self.num_edges_
        
        # Centrality measures
        self.max_eigen_, self.eigenvectors_ = gt.eigenvector(g, weight=self.edge_weights_)
        self.eigenvectors_ = self.eigenvectors_.a
        self.katz_ = gt.graph_tool.centrality.katz(g, weight=self.edge_weights_).a
        self.pageranks_ = gt.graph_tool.centrality.pagerank(g, weight=self.edge_weights_).a
        
        self.eigenvectors_mean, self.eigenvectors_var, self.eigenvectors_skew, self.eigenvectors_kurtosis = get_moments(self.eigenvectors_)
        self.katz_mean, self.katz_var, self.katz_skew, self.katz_kurtosis = get_moments(self.katz_)
        self.pageranks_mean, self.pageranks_var, self.pageranks_skew, self.pageranks_kurtosis = get_moments(self.pageranks_)

        self.eigenvectors_mean_norm = self.eigenvectors_mean/self.num_edges_
        self.katz_mean_norm = self.katz_mean/self.num_edges_
        self.pageranks_mean_norm = self.pageranks_mean/self.num_edges_
        
        # HITS: authority centrality, hub centrality
        self.hits_eig, self.auth_centr_, self.hub_centr_ = gt.graph_tool.centrality.hits(g, weight=self.edge_weights_)
        self.hits_eig = self.hits_eig
        self.auth_centr_ = self.auth_centr_.a
        self.hub_centr_ = self.hub_centr_.a    

        self.auth_centr_mean, self.auth_centr_var, self.auth_centr_skew, self.auth_centr_kurtosis = get_moments(self.auth_centr_)
        self.hub_centr_mean, self.hub_centr_var, self.hub_centr_skew, self.hub_centr_kurtosis = get_moments(self.hub_centr_)

        self.hits_eig_norm = self.hits_eig/self.num_edges_
        self.auth_centr_mean_norm = self.auth_centr_mean/self.num_edges_
        self.hub_centr_mean_norm = self.hub_centr_mean/self.num_edges_

        # Closeness and betweenness
        self.closeness_ = gt.graph_tool.centrality.closeness(g, weight=self.edge_weights_)
        self.closeness_ = self.closeness_.a

        self.vertex_betweenness_ , self.edge_betweenness_ = gt.graph_tool.centrality.betweenness(g, weight=self.edge_weights_)
        self.vertex_betweenness_ = self.vertex_betweenness_.a
        self.edge_betweenness_ = self.edge_betweenness_.a

        self.closeness_mean_, self.closeness_var_, self.closeness_skew_, self.closeness_kurtosis_ = get_moments(self.closeness_)
        self.vertex_betweenness_mean, self.vertex_betweenness_var, self.vertex_betweenness_skew, self.vertex_betweenness_kurtosis = get_moments(self.vertex_betweenness_)
        self.edge_betweenness_mean, self.edge_betweenness_var, self.edge_betweenness_skew, self.edge_betweenness_kurtosis = get_moments(self.edge_betweenness_)
        
        self.vertex_betweenness_mean_norm = self.vertex_betweenness_mean/self.num_edges_
        self.edge_betweenness_mean_norm = self.edge_betweenness_mean/self.num_edges_            
            
        # Reciprocity
        self.edge_reciprocity_ = gt.graph_tool.topology.edge_reciprocity(g)
        self.edge_reciprocity_norm = self.edge_reciprocity_/self.num_edges_

        # Components
        self.largest_component = gt.graph_tool.topology.label_largest_component(g, directed=False).a
        self.fraction_largest_component_ =  np.sum(self.largest_component)/self.largest_component.shape[0]
        self.largest_component = np.sum(self.largest_component)
        
        self.largest_component_norm = self.largest_component/self.num_edges_
        
        # Booleans
        self.is_bipartite_ = gt.graph_tool.topology.is_bipartite(g)
        self.is_DAG_ = gt.graph_tool.topology.is_DAG(g)
        #self.is_planar = gt.graph_tool.topology.is_planar(g)
        
        # Clustering 
        self.local_clustering_coefficient_ = gt.graph_tool.clustering.local_clustering(g).a
        self.global_clustering_coefficient, self.global_clustering_coefficient_var = gt.graph_tool.clustering.global_clustering(g)
        self.local_clustering_coefficient_mean, self.local_clustering_coefficient_var_, self.local_clustering_coefficient_skew, self.local_clustering_coefficient_kurtosis = get_moments(self.local_clustering_coefficient_)

        self.k_core_ = gt.graph_tool.topology.kcore_decomposition(g).a
        self.k_core_mean = np.mean(self.k_core_)
        self.k_core_mean_norm = self.k_core_mean/self.num_edges_
        
        self.local_clustering_coefficient_mean_norm = self.local_clustering_coefficient_mean/self.num_edges_
        self.global_clustering_coefficient_norm = self.global_clustering_coefficient/self.num_edges_

        # Assortivity
        self.assortivity, self.assortivity_var = gt.graph_tool.correlations.assortativity(g, deg="total")
        self.scalar_assortivity, self.scalar_assortivity_var = gt.graph_tool.correlations.scalar_assortativity(g, deg="total")

        self.assortivity_norm = self.assortivity/self.num_edges_
        self.scalar_assortivity_norm = self.scalar_assortivity/self.num_edges_
        
        ## MAX FLOW
        
        # The capacity will be defined as the inverse euclidean distance
        cap = g.new_edge_property("double")
        pos = self.pos_
        edges = list(g.edges())
        for e in edges:
            cap[e] = min(1.0 / norm(pos[e.target()].a - pos[e.source()].a), 10)
        g.edge_properties["cap"] = cap

        cap = g.edge_properties["cap"]
        cap = self.edge_weights_
        
        # Max flow 
        src, tgt = g.vertex(0), g.vertex(self.num_nodes_-1)
        res = gt.graph_tool.flow.edmonds_karp_max_flow(g, src, tgt, cap)
        res.a = cap.a - res.a  # the actual flow
        self.max_flow = sum(res[e] for e in tgt.in_edges())
        
        self.min_st_cut_partition = np.sum(gt.graph_tool.flow.min_st_cut(g, src, cap, res).a)
        self.min_st_cut_partition_norm = self.min_st_cut_partition/self.num_edges_
        self.max_flow_norm = self.max_flow/self.num_edges_
        
        # First vertex features        
        self.fv_degree_ = self.degrees_[0]
        self.fv_eigenvector_ = self.eigenvectors_[0]
        self.fv_katz_ = self.katz_[0]
        self.fv_pagerank_ = self.pageranks_[0]
        self.fv_auth_centr_ = self.auth_centr_[0]
        self.fv_hub_centr_ = self.hub_centr_[0]
        self.fv_closeness_ = self.closeness_[0]
        self.fv_betweenness_ = self.vertex_betweenness_[0]
        self.fv_local_clustering_coeff_ = self.local_clustering_coefficient_[0]
        
        # Min cut       
        g.set_directed(False)        
        self.min_cut, self.partition = gt.graph_tool.flow.min_cut(g, weight=self.edge_weights_)
        self.partition = np.sum(self.partition.a)
        
        self.min_cut_norm = self.min_cut/self.num_edges_
        self.partition_norm = self.partition/self.num_edges_
        
        self.ent_graph_ = entity_graph
Ejemplo n.º 2
0
import graph_tool.all as gt
import matplotlib
FILE = '10000vertices.xml.gz'

print('loading ' + FILE + ' graph')
g = gt.load_graph(FILE)

N = len(list(g.vertices()))
M = len(list(g.edges()))

arr = []

for v in g.vertices():
    arr.append(v.out_degree())

max_degree = max(arr)
min_degree = min(arr)
avg_degree = np.mean(arr)
std_degree = np.std(arr)

print(str(N) + ' vertices')
print(str(M) + ' edges')
print('Max degree: ' + str(max_degree))
print('Min degree: ' + str(min_degree))
print('Avg degree: ' + str(avg_degree) + ' / S.D.: ' + str(std_degree))
print('Density: ' + str((2.0 * M) / (N * (N - 1.0))))
print('Pseudo-diameter: ' + str(gt.pseudo_diameter(g)[0]))
print('Global clustering: ' + str(gt.global_clustering(g)))

# gt.graph_draw(g, output_size=(5000, 5000), vertex_size=1,
#            vcmap=matplotlib.cm.gist_heat_r, output="view.png")
          gt.global_clustering(
              g_friend_LC))  # 0.10158343197387348, 0.013265280343602718

    print("\nDesciptives: Global Clustering Coefficiants - done\n")

### Deskriptives Friendship Network - Largest Component specific ###

#-- (Pseudo-) Diameter of Largest Component --#

if descDia_bool == True:

    print(
        "\n\nDeskriptives Friendship Network - Largest Component specific - (Pseudo-) Diameter\n"
    )

    dist, ends = gt.pseudo_diameter(g_friend_LC)

    print('(Pseudo-) Diameter: ', dist)  # 14.0
    #print('(Pseudo-) Diameter start:', ends[0], 'end:', ends[1])                # start: 1275 end: 37966

    print(
        "\n\nDeskriptives Friendship Network - Largest Component specific - (Pseudo-) Diameter -done\n"
    )

#-- Closeness Distribution of Largest Component --#

if descClose_bool == True:

    print("\n\n#-- Closeness Distribution --#\n")

    vprop_closeness = gt.closeness(g_friend_LC)
Ejemplo n.º 4
0
def diameter_approx(g):
    return gt.pseudo_diameter(g)[0]
                 size1, 100 * size1 / g.num_vertices(),
                 size2, 100 * size2 / g.num_vertices())))

data.append(('min/max/avg degree',
             '{}/{}/{:.2f}'.format(int(deg.a.min()),
                                   int(deg.a.max()),
                                   float(deg.a.mean()))))

data.append(('density', '{:.7f}'.format(2 * g.num_edges() / g.num_vertices() / (g.num_vertices() - 1))))


data.append(('clustering coefficient (std)', '{:.2f} ({:.2f})'.format(*global_clustering(g))))


sampled_sources = np.random.permutation(g.num_vertices())[:100]
dist = np.max([pseudo_diameter(g, s)[0] for s in sampled_sources])
data.append(('pseudo diameter', dist))


data.append(('assortativity (std)', '{:.2f} ({:.2f})'.format(
    *assortativity(g, 'total'))))

index, col = zip(*data)
s = pd.Series(col, index=index)
print(s.to_string())

# save it somewhere

# name = ''.join(os.path.basename(sys.argv[1]).split('.')[:-1])
output_path = os.path.dirname(sys.argv[1]) + '/summary.csv'
s.to_csv(output_path)