def centrality_leaders(budgetYears): network = load_network_for(budgetYears) g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) topK = 10 candidates, rankings = cl.centrality_leaders(g) ordered_list = [] for r in range(len(rankings))[:topK]: #logger.info('tier: %d'%r) for i in list(rankings[r]): node_name = g.vs[candidates[i]]['name'] ordered_list.append(node_name) # set the node's centrality_leader attribute, the higher the better g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] filename = '%s/figures/%s-%s-centrality-leaders.png' % ( root_folder(), startBudgetYear, endBudgetYear) draw(g, filename) logger.info(ordered_list)
def centrality_leaders(budgetYears): network = load_network_for(budgetYears) g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) topK = 10 candidates, rankings = cl.centrality_leaders(g) ordered_list = [] for r in range(len(rankings))[:topK]: #logger.info('tier: %d'%r) for i in list(rankings[r]): node_name = g.vs[candidates[i]]['name'] ordered_list.append(node_name) # set the node's centrality_leader attribute, the higher the better g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] filename = '%s/figures/%s-%s-centrality-leaders.png' % (root_folder(), startBudgetYear, endBudgetYear) draw(g, filename) logger.info(ordered_list)
def smallworldness(network, rep=1000): g = network.g.copy() #logger.info(g.summary()) # there is no point to consider a disconnected graph ( the average path length means nothing) g = ResearchCollaborationNetwork.largest_component(g) n = len(g.vs) m = len(g.es) p = float(m) * 2 / (n * (n - 1)) # sharp threshold: define the connectedness of a ER graph http://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model c = float((np.exp(1) + 1)) * np.log(n) / n logger.info( "Small-world-ness measure: %d iterations; Erdos_Renyi: p = %f (%d/%d), n = %d, np = %f, (1 + e) * (ln n / n) = %f" % (rep, p, m, (n * (n - 1)) / 2, n, n * p, c)) ss = [] for bb in range(rep): rg = igraph.Graph.Erdos_Renyi(n, p, directed=False, loops=False) s = smallworldness_measure(g, rg) ss.append(s) mean_s = np.mean(ss) return mean_s, ss
def smallworldness(network, rep = 1000): g = network.g.copy() #logger.info(g.summary()) # there is no point to consider a disconnected graph ( the average path length means nothing) g = ResearchCollaborationNetwork.largest_component(g) n = len(g.vs) m = len(g.es) p = float(m) * 2 /(n*(n-1)) # sharp threshold: define the connectedness of a ER graph http://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model c = float((np.exp(1) + 1)) * np.log(n) / n logger.info("Small-world-ness measure: %d iterations; Erdos_Renyi: p = %f (%d/%d), n = %d, np = %f, (1 + e) * (ln n / n) = %f"%(rep, p, m, (n*(n-1))/2, n, n * p, c)) ss = [] for bb in range(rep): rg = igraph.Graph.Erdos_Renyi(n, p, directed = False, loops = False) s = smallworldness_measure(g, rg) ss.append(s) mean_s = np.mean(ss) return mean_s, ss
def update_graphml(budgetYears): startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] network = load_network_for(budgetYears) network.g.vs['centrality_leader'] = 0 g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) topK = 50 candidates, rankings = cl.centrality_leaders(g) #ordered_list = [] for r in range(len(rankings))[:topK]: #logger.info('tier: %d'%r) for i in list(rankings[r]): node_name = g.vs[candidates[i]]['name'] # ordered_list.append(node_name) # set the node's centrality_leader attribute, the higher the better #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r node = network.g.vs.select(name_eq=node_name) node['centrality_leader'] = topK - r #logger.info(topK - r) # logger.info(node['name']) filename = '%s/data/networks/%d-%d.graphml' % (root_folder(), startBudgetYear, endBudgetYear) network.write(filename)
def average_strength_for(budgetYears): logger.info(budgetYears) network = load_network_for(budgetYears) g = network.g.copy() # pick the largest component of the network, the subgraph without any isolated nodes (nodes that are not connected to any other nodes) g = ResearchCollaborationNetwork.largest_component(g) g = set_category_by_is_ctsa(g, refG) logger.info('ctsa: %0.2f'%average_strength(g, 1.0)) logger.info('non-ctsa: %0.2f'%average_strength(g, 0.0))
def average_shortest_path_for(budgetYears): logger.info(budgetYears) network = load_network_for(budgetYears) g = network.g.copy() # pick the largest component of the network, the subgraph without any isolated nodes (nodes that are not connected to any other nodes) g = ResearchCollaborationNetwork.largest_component(g) g = set_category_by_is_ctsa(g, refG) weights = [ 1/weight for weight in g.es['weight']] logger.info('within non-CTSA investigators: %0.3f'%average_shortest_path(g, weights = weights, source = 0.0, target = 0.0)) logger.info('within CTSA investigators: %0.3f'%average_shortest_path(g, weights = weights, source=1.0, target = 1.0)) #logger.info('from CTSA to non-CTSA: %0.3f'%average_shortest_path(g, weights = weights, source = 1.0, target = 0.0)) logger.info('from non-CTSA to all: %0.3f'%average_shortest_path(g, weights = weights, source = 0.0, target = None)) logger.info('from CTSA to all: %0.3f'%average_shortest_path(g, weights = weights, source = 1.0, target = None))
def update_graphml(budgetYears): startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] network = load_network_for(budgetYears) network.g.vs['centrality_leader'] = 0 g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) topK = 50 candidates, rankings = cl.centrality_leaders(g) #logger.info(candidates) #logger.info(rankings) #ordered_list = [] for r in range(len(rankings))[:topK]: logger.info('tier: %d' % r) for i in list(rankings[r]): node_name = g.vs[candidates[i]]['name'] # ordered_list.append(node_name) # set the node's centrality_leader attribute, the higher the better #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r node = network.g.vs.select(name_eq=node_name) #logger.info(node['name']) node['centrality_leader'] = r + 1 #logger.info(topK - r) # logger.info(node['name']) filename = '%s/data/networks/%d-%d.graphml' % ( root_folder(), startBudgetYear, endBudgetYear) network.write(filename)
def network_to_d3(budgetYears): network = load_network_for(budgetYears) #network = ResearchCollaborationNetwork.read(budgetYears) startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] filename = '%s/data/networks/%s-%s-complete.json' % (root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(network.g, filename) # remove isolated nodes g = network.g.copy() g = ResearchCollaborationNetwork.simplify(g) filename = '%s/data/networks/%s-%s.json' % (root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(g, filename) # only the largest components g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) filename = '%s/data/networks/%s-%s-largest-component.json' % (root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(g, filename)
def network_to_d3(budgetYears): network = load_network_for(budgetYears) #network = ResearchCollaborationNetwork.read(budgetYears) startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] filename = '%s/data/networks/%s-%s-complete.json' % ( root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(network.g, filename) # remove isolated nodes g = network.g.copy() g = ResearchCollaborationNetwork.simplify(g) filename = '%s/data/networks/%s-%s.json' % (root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(g, filename) # only the largest components g = network.g.copy() g = ResearchCollaborationNetwork.largest_component(g) filename = '%s/data/networks/%s-%s-largest-component.json' % ( root_folder(), startBudgetYear, endBudgetYear) ResearchCollaborationNetwork.d3(g, filename)
def draw_g(budgetYears): network = load_network_for(budgetYears) g = network.g.copy() #g = g.simplify(multiple=True, loops=True,combine_edges=sum) # convert to undirected #g.to_undirected(combine_edges=sum) g = ResearchCollaborationNetwork.simplify(g) startBudgetYear = budgetYears[0] endBudgetYear = budgetYears[-1] filename = '%s/figures/%s-%s-%d.png'%(root_folder(),startBudgetYear, endBudgetYear,len(g.vs)) #logger.info(g.summary()) draw(g, filename) gl = ResearchCollaborationNetwork.largest_component(g) filename = '%s/figures/%s-%s-%d-largest-component.png'%(root_folder(),startBudgetYear, endBudgetYear,len(gl.vs)) draw(gl, filename)
def network_characteristics(budgetYears): logger.info("================================================================") logger.info(budgetYears) network = load_network_for(budgetYears) g = network.g.copy() # simplified network is the one without any isolated nodes (nodes that are not connected to any other nodes) g = ResearchCollaborationNetwork.simplify(g) logger.info('# of nodes: %d'%(len(g.vs))) logger.info('# of edges: %d'%(len(g.es))) logger.info('density: %.3f'%(g.density())) new_edges = 0.0 # 2006 is the baseline if budgetYears[0] > 2006: if budgetYears[0] == 2010 and budgetYears[-1] == 2012: pBudgetYears = range(2006,2010) else: pBudgetYears = np.array(budgetYears) - 1 pNetwork = load_network_for(pBudgetYears) pg = pNetwork.g.copy() pg = ResearchCollaborationNetwork.simplify(pg) new_edges = average_number_of_new_edges(g, pg) logger.info('average number of new edges: %.3f'%new_edges) logger.info('# of isolated components: %d'%(num_of_isolated_components(g))) # only the largest component, mainly because shortest path length is rather arbitrary on graphs with isolated components, which our RCNs are. g = ResearchCollaborationNetwork.largest_component(g) weights = g.es['weight'] r_weights = [ 1/float(weight) for weight in g.es['weight']] no_weigths = [ 1 for weight in g.es['weight']] logger.info('# of nodes (largest component): %d'%(len(g.vs))) logger.info('# of edges (largest component): %d'%(len(g.es))) C_g = g.transitivity_avglocal_undirected(mode='zero', weights=no_weigths) logger.info('C_g (weights = None): %.3f'%C_g) C_wg = g.transitivity_avglocal_undirected(mode='zero', weights=weights) logger.info('C_g (weights = number of collaborations): %.3f'%C_wg) C_tg = g.transitivity_undirected(mode='zero') logger.info('C_g (triplets definition): %.3f'%C_tg) L_g = average_shortest_path_length_weighted(g, no_weigths) logger.info("L_g (weights = 1): %.3f"%L_g) L_wg = average_shortest_path_length_weighted(g, r_weights) logger.info("L_g (weights = 1/weights): %.3f"%L_wg) D_wg = diversity(g, r_weights) logger.info("D_g (weights = 1/weights): %.3f"%D_wg)
def network_characteristics(budgetYears): logger.info( "================================================================") logger.info(budgetYears) network = load_network_for(budgetYears) g = network.g.copy() # simplified network is the one without any isolated nodes (nodes that are not connected to any other nodes) g = ResearchCollaborationNetwork.simplify(g) logger.info('# of nodes: %d' % (len(g.vs))) logger.info('# of edges: %d' % (len(g.es))) logger.info('density: %.3f' % (g.density())) new_edges = 0.0 # 2006 is the baseline if budgetYears[0] > 2006: if budgetYears[0] == 2010 and budgetYears[-1] == 2012: pBudgetYears = range(2006, 2010) else: pBudgetYears = np.array(budgetYears) - 1 pNetwork = load_network_for(pBudgetYears) pg = pNetwork.g.copy() pg = ResearchCollaborationNetwork.simplify(pg) new_edges = average_number_of_new_edges(g, pg) logger.info('average number of new edges: %.3f' % new_edges) logger.info('# of isolated components: %d' % (num_of_isolated_components(g))) # only the largest component, mainly because shortest path length is rather arbitrary on graphs with isolated components, which our RCNs are. g = ResearchCollaborationNetwork.largest_component(g) weights = g.es['weight'] r_weights = [1 / float(weight) for weight in g.es['weight']] no_weigths = [1 for weight in g.es['weight']] logger.info('# of nodes (largest component): %d' % (len(g.vs))) logger.info('# of edges (largest component): %d' % (len(g.es))) C_g = g.transitivity_avglocal_undirected(mode='zero', weights=no_weigths) logger.info('C_g (weights = None): %.3f' % C_g) C_wg = g.transitivity_avglocal_undirected(mode='zero', weights=weights) logger.info('C_g (weights = number of collaborations): %.3f' % C_wg) C_tg = g.transitivity_undirected(mode='zero') logger.info('C_g (triplets definition): %.3f' % C_tg) L_g = average_shortest_path_length_weighted(g, no_weigths) logger.info("L_g (weights = 1): %.3f" % L_g) L_wg = average_shortest_path_length_weighted(g, r_weights) logger.info("L_g (weights = 1/weights): %.3f" % L_wg) D_wg = diversity(g, r_weights) logger.info("D_g (weights = 1/weights): %.3f" % D_wg)