def testFromIntAttribute(self):
     cl = VertexClustering.FromAttribute(self.graph, "int")
     self.assertTrue(cl.membership == list(range(10)))
     cl = VertexClustering.FromAttribute(self.graph, "int", 15)
     self.assertTrue(cl.membership == [0, 1, 0, 0, 2, 1, 3, 0, 4, 0])
     cl = VertexClustering.FromAttribute(self.graph, "int", [10, 20, 30])
     self.assertTrue(cl.membership == [0, 1, 2, 2, 1, 1, 3, 2, 1, 0])
Ejemplo n.º 2
0
def main():
    #Check input parameter alpha
    if len(sys.argv)!=2:
        print("Enter value of alpha as parameter")
        return
    alpha = float(sys.argv[1])
    #Convert for preferred output to file
    alph = 0
    if alpha == 0.5:
        alph = 5
    elif alpha == 0.0:
        alph = 0
    elif alpha == 1.0:
        alph = 1
    
    #Input Graph
    graph = Graph.Read_Edgelist('data/fb_caltech_small_edgelist.txt')
    attr = pd.read_csv('data/fb_caltech_small_attrlist.csv')

    #Initialize weights and attributes
    graph.es['weight'] = 1
    attr_names = attr.keys()
    for x in attr.iterrows():
        for y in range(len(x[1])):
            graph.vs[x[0]][attr_names[y]] = x[1][y]

    #Similarity Matrix
    sim_matrix = compute_similarity_matrix(graph)

    #Phase 1
    community_list = phase1(graph,alpha,sim_matrix)
    print('Communities after Phase 1:')
    print(len(set(community_list)),"Communities")
    phase1_output = ''
    for x in VertexClustering(graph,community_list):
        if x:
            phase1_output += ','.join(map(str,x))
            phase1_output += "\n"
    phase1_output = phase1_output[:-2]

    #Phase 2
    community_list, mapped_clusters = phase2(graph,community_list,alpha,sim_matrix)
    print(mapped_clusters)
    print('Communities after Phase 2:')
    print(len(set(community_list)),"Communities")
    phase2_output = ''
    for cluster in VertexClustering(graph,community_list):
        if cluster:
            original_vertices = []
            for vertex in cluster:
                original_vertices.extend(mapped_clusters[vertex])
            phase2_output += ','.join(map(str,original_vertices))
            phase2_output += '\n'
            print(cluster)
    phase2_output = phase2_output[:-2]

    file = open("communities_"+str(alph)+".txt", 'w+')
    file.write(phase2_output)
    file.close()
    return
def main():
    parser = argparse.ArgumentParser(description='calculated various centrality-related stats (only the giant component of the graph considered!')
    parser.add_argument('--coauth-graph', type=argparse.FileType('r'), help='path to output pickled, gzipped graph file', required=True)
    parser.add_argument('--communities', type=argparse.FileType('r'), help='path to input .json.bz2 communities file', required=True)
    parser.add_argument('--titles', type=argparse.FileType('r'), help='path to input .json.bz2 titles file', required=True)
    parser.add_argument('--K', type=int, help='number of considered, equaldistand communites 0,floor(1*(N-1)/K),...,N-1', required=True)
    parser.add_argument('--J', type=int, help='maxiumum number of highest considered nodes per community', required=True)
    
    args = parser.parse_args()
    input_coauth_graph_path = args.coauth_graph.name
    input_communities_path = args.communities.name
    input_titles_path = args.titles.name
    K = args.K
    J = args.J
    
    logger.info('running with:\n{}'.format(pformat({'input_coauth_graph_path':input_coauth_graph_path, 'input_communities_path':input_communities_path, 'input_titles_path':input_titles_path, 'K':K, 'J':J})))
    
    logger.info('loading graph from {}'.format(input_coauth_graph_path))
    coauth_graph = Graph.Read_Picklez(input_coauth_graph_path)
    logger.info('using largest connected component of largest size instead actual graph')
    coauth_graph = coauth_graph.components().giant()
    log_igraph(coauth_graph)
    
    communities = load_communities(input_communities_path)
    titles = load_titles(input_titles_path)
    
    logger.info('creating vertex clustering of community labels')
    node_labels = [communities[name] for name in coauth_graph.vs['name']]
    community_structure = VertexClustering(coauth_graph, membership=node_labels)
    logger.debug('created vertex clustering {}'.format(community_structure))
        
    community_sizes = list(enumerate(community_structure.sizes()))
    community_sizes.sort(key=lambda t:t[1], reverse=True)
    logger.debug('community sizes, sorted descending\n{}'.format(community_sizes))
        
    logger.info('filtering to communities of at least {} nodes'.format(J))
    community_sizes = [(commid,size) for commid,size in community_sizes if size >= J]
    logger.info('filtered to {} communities'.format(len(community_sizes)))
        
    N = len(community_sizes)
    logger.info('calculating considered communities number of communites N={}, considering K={} equidistant communities'.format(N, K))
    community_indices = [math.floor(k*(N-1)/(K-1)) for k in range(0,K)]
    logger.info('considering indices {}'.format(community_indices))
    considered_communities = [community_sizes[i] for i in community_indices]
    logger.info('considering communities (id,size): {}'.format(considered_communities))
      
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, degree)
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, strength)
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, betweenness)
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, weighted_betweenness)
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, closeness)
    find_max_nodes_per_community(community_structure, considered_communities, titles, J, weighted_closeness)
Ejemplo n.º 4
0
def list_repr_communities(fmap: str,
                          vc: VertexClustering,
                          directed=True) -> list:
    '''
        input:  dblp json file for paper->author mapping
                vertexClustering Obj
                isDirected
        process:
            if directed:
                paper -> author mapping
        output: list(set(authors))
    '''
    com = vc.subgraphs()
    res = []
    if directed:
        # map papers to author
        papers, _ = rdp.load_data(fmap)
        for g in com:
            tmp = set()
            for v in g.vs:
                pid = int(v['name'])  # paper_id
                try:
                    for a in papers[pid]['authors']:
                        tmp.add(int(a['id']))
                except Exception:
                    pass
            res.append(tmp)
    else:
        for g in com:
            tmp = set()
            for v in g.vs:
                tmp.add(int(v['name']))  # author_id
            res.append(tmp)
    return res
Ejemplo n.º 5
0
def community_spectral(G, k=2, weights=None, which='NCut_rw'):
    '''
  Performs a relaxed version of Ratio or N-cut by performing k-means on 
  the (n, k)-matrix of eigenvectors from different versions of the Graph 
  Laplacian.
  @params
   G        : an igraph.Graph.
   k        : number of communities to cluster.
   weights  : A weight vector or the name of an edge property.
   which    : the type of cut to perform, one of RatioCut, NCut, or NCut_rw.
  @returns
   vc : VertexClustering with up to k clusters
  '''
    method = {
        'RatioCut'.lower():
        lambda g, c, w: __community_spectral_base(g, c, w, normalized=False),
        'NCut'.lower():
        lambda g, c, w: __community_spectral_base(g, c, w, normalized=True),
        'NCut_rw'.lower():
        lambda g, c, w: __community_spectral_rw(g, c, w)
    }

    # The default cut is accross components
    vc = G.components()
    if len(vc) >= k:
        membership = [x % k for x in vc.membership]
        vc = VertexClustering(G, membership)
    else:
        vc = method[which.lower()](G, k, weights)

    return vc
Ejemplo n.º 6
0
def applyCommunities(graph, membership, names, intersect=False):
    """Apply the communities passed as a parameter to this Graph."""
    # membership = parent.clusters.membership
    # names = parent.g.vs['name']

    members = [0] * len(membership)
    assigned = [0] * len(membership)
    maxM = 0

    for idx, n in enumerate(membership):
        members[graph.idgen[names[idx]]] = n
        assigned[graph.idgen[names[idx]]] = 1
        maxM = max(maxM, n)

    if intersect:
        oldMembership = graph.g.clusters().membership
        newMembership = members[:len(graph.g.vs)]

        uniquePairs = set(zip(oldMembership, newMembership))
        mapper = dict()
        for (i, pair) in enumerate(uniquePairs):
            mapper[pair] = i

        graph.membership = list(mapper[(oldMembership[i], e)]
                                for i, e in enumerate(newMembership))
    else:
        graph.membership = members[:len(graph.g.vs)]

    graph.clusters = VertexClustering(graph.g, membership=graph.membership)
Ejemplo n.º 7
0
def average_recursive_network_partition(parcel_path=None,
                                        subject_path=None,
                                        matrix=None,
                                        graph_cost=.1,
                                        max_cost=.25,
                                        min_cost=0.05,
                                        min_community_size=5,
                                        min_weight=1.):
    """
	subject_past: list of paths to subject file or files

	Combines network partitions across costs (Power et al, 2011)
	Starts at max_cost, finds partitions that nodes are in,
	slowly decreases density to find smaller partitions, but keeps 
	information (from higher densities) about nodes that become disconnected.

	Runs nodal roles on one cost (graph_cost), but with final partition.

	Returns brain_graph object.
	"""

    if matrix == None:
        subject_time_series_data = load_subject_time_series(subject_path)
        matrix = time_series_to_matrix(
            subject_time_series=subject_time_series_data,
            voxel=False,
            parcel_path=parcel_path)
        matrix = np.nanmean(matrix, axis=0)
        matrix[matrix < 0] = 0.0
        np.fill_diagonal(matrix, 0)
    matrix[matrix < 0] = 0.0
    np.fill_diagonal(matrix, 0)
    final_edge_matrix = matrix.copy()
    final_matrix = []
    cost = max_cost
    final_graph = matrix_to_igraph(matrix.copy(), cost=graph_cost)
    while True:
        temp_matrix = np.zeros((matrix.shape[0], matrix.shape[0]))
        graph = matrix_to_igraph(matrix, cost=cost)
        partition = graph.community_infomap(edge_weights='weight')
        community_matrix(partition.community.membership, min_community_size)
        if cost < min_cost:
            break
        if cost <= .05:
            cost = cost - 0.001
            continue
        if cost <= .15:
            cost = cost - 0.01
            continue
        if cost >= .3:
            cost = cost - .05
            continue
        if cost > .15:
            cost = cost - 0.01
            continue
    graph = matrix_to_igraph(final_matrix * final_edge_matrix, cost=1.)
    partition = graph.community_infomap(edge_weights='weight')
    return brain_graph(
        VertexClustering(final_graph, membership=partition.membership))
    def testClusterGraph(self):
        cl = VertexClustering(self.graph, [0, 0, 0, 1, 1, 1, 2, 2, 2, 2])
        self.graph.delete_edges(
            self.graph.es.select(_between=([0, 1, 2], [3, 4, 5])))
        clg = cl.cluster_graph(dict(string="concat", int=max))

        self.assertTrue(sorted(clg.get_edgelist()) == [(0, 2), (1, 2)])
        self.assertTrue(not clg.is_directed())
        self.assertTrue(clg.vs["string"] == ["aaa", "bbc", "ccab"])
        self.assertTrue(clg.vs["int"] == [41, 64, 47])

        clg = cl.cluster_graph(dict(string="concat", int=max), False)
        self.assertTrue(
            sorted(clg.get_edgelist()) == [(0, 0)] * 3 + [(0, 2)] * 12 +
            [(1, 1)] * 3 + [(1, 2)] * 12 + [(2, 2)] * 6)
        self.assertTrue(not clg.is_directed())
        self.assertTrue(clg.vs["string"] == ["aaa", "bbc", "ccab"])
        self.assertTrue(clg.vs["int"] == [41, 64, 47])
Ejemplo n.º 9
0
def clean_up_membership(partition, matrix, min_community_size):
    for min_community_size in range(2, min_community_size + 1):
        small_nodes = []
        small_communities = []
        membership = []
        for node in range(len(partition.membership)):
            if partition.sizes(
                    partition.membership[node])[0] < min_community_size:
                small_nodes.append(node)
                small_communities.append(partition.membership[node])
        for node in range(len(partition.membership)):
            if node not in small_nodes:
                membership.append(partition.membership[node])
                continue
            community_weights = []
            for community in range(len(partition.sizes())):
                if community not in small_communities:
                    community_weights.append(
                        np.nansum(matrix[node][np.argwhere(
                            np.array(partition.membership) == community)]))
                else:
                    community_weights.append(0.0)
            community_weights = np.array(community_weights)
            community_weights[np.isnan(community_weights)] = 0.0
            if np.nanmax(community_weights) == 0.0:
                membership.append(partition.membership[node])
                continue
            membership.append(np.argmax(community_weights))
        membership = np.array(membership)
        temp_partition = VertexClustering(partition.graph,
                                          membership=membership)
        empty = np.argwhere(np.array(temp_partition.sizes()) == 0).reshape(-1)
        diff = 0
        for e in empty:
            over = np.argwhere(membership > (e - diff)).reshape(-1)
            for m in over:
                membership[m] = membership[m] - 1
            diff = diff + 1
        partition = VertexClustering(partition.graph, membership=membership)
    return membership
def init_clusters(graph1, graph2, partition, rand_percentage=0.0):
    if rand_percentage >= 1 or len(partition) == 0:
        return VertexClustering(graph1, list(range(len(graph1.vs))))

    for idx, cluster in enumerate(partition):
        if partition.size(idx) > 1:
            vertices = graph2.vs(cluster)["name"]
            graph1.vs(name_in=vertices)["cluster_seed"] = idx

    cluster_length = len(partition)

    for vertex in graph1.vs(cluster_seed=None):
        vertex["cluster_seed"] = cluster_length
        cluster_length += 1

    if rand_percentage > 0:
        vertices = graph1.vs(cluster_seed_lt=len(partition))
        random_vertices = round((len(vertices) - 1) * rand_percentage)
        for v in sample(range(0, len(vertices)), random_vertices):
            vertices[v]["cluster_seed"] = cluster_length
            cluster_length += 1
    return VertexClustering.FromAttribute(graph1, attribute="cluster_seed")
Ejemplo n.º 11
0
def partition_avg_costs(matrix, costs, min_community_size, graph_cost):
    final_edge_matrix = matrix.copy()
    final_matrix = []
    for cost in costs:
        graph = matrix_to_igraph(matrix.copy(), cost)
        partition = graph.community_infomap(edge_weights='weight')
        final_matrix.append(
            community_matrix(partition.membership, min_community_size))
    final_graph = matrix_to_igraph(np.nanmean(final_matrix, axis=0) *
                                   final_edge_matrix,
                                   cost=1.)
    partition = graph.community_infomap(edge_weights='weight')
    return brain_graph(
        VertexClustering(final_graph, membership=partition.membership))
Ejemplo n.º 12
0
def clean_up_membership(partition,matrix,min_community_size):
	for min_community_size in range(2,min_community_size+1):
		small_nodes = []
		small_communities = []
		membership = []
		for node in range(len(partition.membership)):
			if partition.sizes(partition.membership[node])[0] < min_community_size:
				small_nodes.append(node)
				small_communities.append(partition.membership[node])
		for node in range(len(partition.membership)):
			if node not in small_nodes:
				membership.append(partition.membership[node])
				continue
			community_weights = []
			for community in range(len(partition.sizes())):
				if community not in small_communities:
					community_weights.append(np.nansum(matrix[node][np.argwhere(np.array(partition.membership) == community)]))
				else:
					community_weights.append(0.0)
			community_weights = np.array(community_weights)
			community_weights[np.isnan(community_weights)] = 0.0
			if np.nanmax(community_weights) == 0.0:
				membership.append(partition.membership[node])
				continue
			membership.append(np.argmax(community_weights))
		membership = np.array(membership)
		temp_partition = VertexClustering(partition.graph, membership=membership)
		empty = np.argwhere(np.array(temp_partition.sizes())==0).reshape(-1)
		diff = 0
		for e in empty:
			over = np.argwhere(membership > (e - diff)).reshape(-1)
			for m in over:
				membership[m] = membership[m] - 1
			diff = diff + 1
		partition = VertexClustering(partition.graph, membership=membership)
	return membership
Ejemplo n.º 13
0
def get_ground_truth(G=None):

    if G is None:
        G = get_graph()

    class_list = G.vs['classname']
    class_dict = dict.fromkeys(class_list)

    #set the indices for lookup purposes. These will be the cluster ids
    for idx, k in enumerate(class_dict):
        class_dict[k] = idx

    membership = [class_dict[student] for student in class_list]

    return VertexClustering(G, membership)
Ejemplo n.º 14
0
def enforce_community_class(vc, nclass, nid):
    graph = vc.graph
    for (key, vals) in nclass.items():
        comm = set([vc.membership[nid[v]] for v in vals if v in nid])
        if len(comm) < 2: continue
        mod = 0
        for c in comm:
            _membership = vc.membership[:]
            for v in vals:
                _membership[nid[v]] = c
            _vc = VertexClustering(graph, membership=_membership)
            _mod = _vc.modularity
            if _mod > mod:
                vc = _vc
                mod = vc.modularity
    vc.recalculate_modularity()
            iterator2 = iterator2 + 1  #since commuataive we skip certain rows
    iterator1 = iterator1 + 1

#print(simmat)
#running phase1 algorithm
community = executingPhase(graphobtained, alphavalue, simmat)
#running phase 2
print('phase 1 executed')
listofcommunity, clustermapping = executingPhase2(graphobtained, community,
                                                  alphavalue, simmat)
print('phase 2 executed')

# clusterting vertices together and printing output
output = ''
for clust in VertexClustering(
        graphobtained, listofcommunity
):  #{Reference link : https://igraph.org/python/doc/igraph.clustering.VertexClustering-class.html}
    if not clust:
        continue
    else:
        orig = []
        iterator = 0
        while (iterator < len(clust)):  #runs till end of cluster
            extendedVal = clustermapping[clust[iterator]]
            orig.extend(
                extendedVal
            )  #[Reference link : https://www.programiz.com/python-programming/methods/list/extend]
            iterator = iterator + 1
        output += ','.join(
            map(str, orig)
        )  #[Reference link : https://www.geeksforgeeks.org/python-map-function/]
Ejemplo n.º 16
0
def preferential_routing_multi_density(variables):
	metric = variables[0]
	n_nodes = variables[1]
	density = variables[2]
	graph = variables[3]
	np.random.seed(variables[4])
	all_shortest = variables[5]
	print variables[4],variables[0]
	q_ratio = variables[6]
	rccs = []
	for idx in range(150):
		delete_edges = graph.get_edgelist()
		if metric != 'none':
			vc = graph.community_fastgreedy().as_clustering()
			orig_q = vc.modularity
			membership = vc.membership
			orig_sps = np.sum(np.array(graph.shortest_paths()))
			community_matrix = brain_graphs.community_matrix(membership,0)
			np.fill_diagonal(community_matrix,1)
			orig_bc_sps = np.sum(np.array(graph.shortest_paths())[community_matrix!=1])
			q_edge_scores = []
			sps_edge_scores = []
			for edge in delete_edges:
				eid = graph.get_eid(edge[0],edge[1],error=False)
				graph.delete_edges(eid)
				q_edge_scores.append(VertexClustering(graph,membership).modularity-orig_q)
				if all_shortest == 'all':
					sps_edge_scores.append(orig_sps-np.sum(np.array(graph.shortest_paths())))
				if all_shortest == 'bc':
					sps_edge_scores.append(orig_bc_sps-np.sum(np.array(graph.shortest_paths())[community_matrix!=1]))
				graph.add_edge(edge[0],edge[1],weight=1)
			q_edge_scores = np.array(q_edge_scores)#Q when edge removed - original Q. High means increase in Q when edge removed.
			sps_edge_scores = np.array(sps_edge_scores)#original sps minus sps when edge removed. Higher value means more efficient.
			if len(np.unique(sps_edge_scores)) > 1:
				q_edge_scores = scipy.stats.zscore(scipy.stats.rankdata(q_edge_scores,method='min'))
				sps_edge_scores = scipy.stats.zscore(scipy.stats.rankdata(sps_edge_scores,method='min'))
				scores = (q_edge_scores*q_ratio) + (sps_edge_scores*(1-q_ratio))
			else:
				scores = scipy.stats.rankdata(q_edge_scores,method='min')
		if metric == 'q':
			edges = np.array(delete_edges)[np.argsort(scores)][int(-(graph.ecount()*.05)):]
			edges = np.array(list(edges)[::-1])
		if metric == 'none':
			scores = np.random.randint(0,100,(int(graph.ecount()*.05))).astype(float)
			edges = np.array(delete_edges)[np.argsort(scores)]
		for edge in edges:
			eid = graph.get_eid(edge[0],edge[1],error=False)
			graph.delete_edges(eid)
			if graph.is_connected() == False:
				graph.add_edge(edge[0],edge[1],weight=1)
				continue
			while True:
				i = np.random.randint(0,n_nodes)
				j = np.random.randint(0,n_nodes)
				if i == j:
					continue
				if graph.get_eid(i,j,error=False) == -1:
					graph.add_edge(i,j,weight=1)
					break
		sys.stdout.flush()
		vc = brain_graphs.brain_graph(graph.community_fastgreedy().as_clustering())
		pc = vc.pc
		pc[np.isnan(pc)] = 0.0
		pc_emperical_phis = RC(graph,scores=pc).phis()
		pc_average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=pc).phis() for i in range(25)],axis=0)
		pc_normalized_phis = pc_emperical_phis/pc_average_randomized_phis
		degree_emperical_phis = RC(graph, scores=graph.strength(weights='weight')).phis()
		average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=graph.strength(weights='weight')).phis() for i in range(25)],axis=0)
		degree_normalized_phis = degree_emperical_phis/average_randomized_phis
		rcc = pc_normalized_phis[-10:]
		if np.isfinite(np.nanmean(rcc)):
			rccs.append(np.nanmean(rcc))	
	return [metric,pc_normalized_phis,degree_normalized_phis,graph]
 def testFromStringAttribute(self):
     cl = VertexClustering.FromAttribute(self.graph, "string")
     self.assertTrue(cl.membership == [0, 0, 0, 1, 1, 2, 2, 2, 0, 1])
Ejemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'calculated the most central documents of each community and writes their centrality data (titles,centralities) to a JSON file (exactly min(#nodes of community,J) titles are save per community)'
    )
    parser.add_argument('--coauth-graph',
                        type=argparse.FileType('r'),
                        help='path to output pickled, gzipped graph file',
                        required=True)
    parser.add_argument('--communities',
                        type=argparse.FileType('r'),
                        help='path to input .json.bz2 communities file',
                        required=True)
    parser.add_argument('--titles',
                        type=argparse.FileType('r'),
                        help='path to input .json.bz2 titles file',
                        required=True)
    parser.add_argument(
        '--centrality-data',
        type=argparse.FileType('w'),
        help='path to output .json community->centrality_data file',
        required=True)
    centrality_measures = {
        'degree': degree,
        'strength': strength,
        'betweenness': betweenness,
        'closeness': closeness,
        'weighted_betweenness': weighted_betweenness,
        'weighted_closeness': weighted_closeness
    }
    parser.add_argument('--centrality-measure',
                        choices=centrality_measures,
                        help='centrality measure',
                        required=True)
    parser.add_argument(
        '--max-docs-per-comm',
        type=int,
        help='maxiumum number of highest considered nodes per community',
        required=True)

    args = parser.parse_args()
    input_coauth_graph_path = args.coauth_graph.name
    input_communities_path = args.communities.name
    input_titles_path = args.titles.name
    output_centrality_data_path = args.centrality_data.name
    centrality_measure = args.centrality_measure
    max_docs_per_comm = args.max_docs_per_comm

    logger.info('running with:\n{}'.format(
        pformat({
            'input_coauth_graph_path': input_coauth_graph_path,
            'input_communities_path': input_communities_path,
            'input_titles_path': input_titles_path,
            'output_centrality_data_path': output_centrality_data_path,
            'centrality_measure': centrality_measure,
            'max_docs_per_comm': max_docs_per_comm
        })))

    logger.info('loading graph from {}'.format(input_coauth_graph_path))
    coauth_graph = Graph.Read_Picklez(input_coauth_graph_path)
    log_igraph(coauth_graph)

    communities = load_communities(input_communities_path)
    titles = load_titles(input_titles_path)

    # entferne Knoten, die nicht in gespeicherter Communitystruktur auftauchen (z.B. weil nicht in Riesencommunity sind)
    logger.info('removing nodes of graph without community labels')
    node_names = coauth_graph.vs['name']
    node_names_of_communities = communities.keys()
    node_names_not_in_communities = set(node_names) - set(
        node_names_of_communities)
    coauth_graph.delete_vertices(node_names_not_in_communities)
    logger.info('graph stats after removing')
    log_igraph(coauth_graph)

    logger.info('creating vertex clustering of community labels')
    node_labels = [communities[name] for name in coauth_graph.vs['name']]
    community_structure = VertexClustering(coauth_graph,
                                           membership=node_labels)
    logger.debug('created vertex clustering {}'.format(community_structure))

    logger.info(
        'computing {}-centralities of {} documents in {} communities'.format(
            centrality_measure, community_structure.n,
            len(community_structure)))
    centrality_function = centrality_measures[centrality_measure]
    centrality_data = {}
    for comm_id in range(len(community_structure)):
        comm_subgraph = community_structure.subgraph(comm_id)
        max_node_names_centralities = get_top_nodes_of_communities(
            comm_subgraph, max_docs_per_comm, centrality_function)
        logger.debug(
            'max_node_names_weights {}'.format(max_node_names_centralities))
        max_node_names, centralities = zip(*max_node_names_centralities)
        max_doc_titles = get_document_titles_of_node_names(
            max_node_names, titles)
        logger.debug('max titles: {}'.format(max_doc_titles))
        centrality_data_of_community = {
            'size': comm_subgraph.vcount(),
            'titles': max_doc_titles,
            'centralities': centralities
        }
        centrality_data[comm_id] = centrality_data_of_community

    logger.info(
        'saving community centrality data (titles,centralities) of {} communities'
        .format(len(centrality_data)))
    save_data_to_json(centrality_data, output_centrality_data_path)
Ejemplo n.º 19
0
def __eigenvectors_to_vc(G, eigvc):
    centroid, label = kmeans2(eigvc, eigvc.shape[1], minit='points')
    return VertexClustering(G, label)
Ejemplo n.º 20
0
def recursive_network_partition(parcel_path=None,
                                subject_paths=[],
                                matrix=None,
                                graph_cost=.1,
                                max_cost=.25,
                                min_cost=0.05,
                                min_community_size=5,
                                min_weight=1.):
    """
	subject_past: list of paths to subject file or files

	Combines network partitions across costs (Power et al, 2011)
	Starts at max_cost, finds partitions that nodes are in,
	slowly decreases density to find smaller partitions, but keeps 
	information (from higher densities) about nodes that become disconnected.

	Runs nodal roles on one cost (graph_cost), but with final partition.

	Returns brain_graph object.
	"""

    if matrix == None:
        matrix = []
        for subject_path in subject_paths:
            subject_time_series_data = load_subject_time_series(subject_path)
            matrix.append(
                time_series_to_matrix(
                    subject_time_series=subject_time_series_data,
                    voxel=False,
                    parcel_path=parcel_path))
        matrix = np.nanmean(matrix, axis=0)
        matrix[matrix < 0] = 0.0
        np.fill_diagonal(matrix, 0)
    matrix[matrix < 0] = 0.0
    np.fill_diagonal(matrix, 0)
    final_edge_matrix = matrix.copy()
    final_matrix = np.zeros(matrix.shape)
    cost = max_cost
    final_graph = matrix_to_igraph(matrix.copy(), cost=graph_cost)
    while True:
        temp_matrix = np.zeros((matrix.shape[0], matrix.shape[0]))
        graph = matrix_to_igraph(matrix, cost=cost)
        partition = graph.community_infomap(edge_weights='weight')
        connected_nodes = []
        for node in range(partition.graph.vcount()):
            if partition.graph.strength(node, weights='weight') > min_weight:
                if partition.sizes()[
                        partition.membership[node]] > min_community_size:
                    connected_nodes.append(node)
        community_edges = []
        between_community_edges = []
        for edge in combinations(connected_nodes, 2):
            if partition.membership[edge[0]] == partition.membership[edge[1]]:
                community_edges.append(edge)
            else:
                between_community_edges.append(edge)
        for edge in community_edges:
            final_matrix[edge[0], edge[1]] = 1
            final_matrix[edge[1], edge[0]] = 1
        for edge in between_community_edges:
            final_matrix[edge[0], edge[1]] = 0
            final_matrix[edge[1], edge[0]] = 0
        if cost < min_cost:
            break
        if cost <= .05:
            cost = cost - 0.001
            continue
        if cost <= .15:
            cost = cost - 0.01
            continue
        if cost >= .3:
            cost = cost - .05
            continue
        if cost > .15:
            cost = cost - 0.01
            continue
    graph = matrix_to_igraph(final_matrix * final_edge_matrix, cost=1.)
    partition = graph.community_infomap(edge_weights='weight')
    return brain_graph(
        VertexClustering(final_graph, membership=partition.membership))