Esempio n. 1
0
def centrality_leaders(budgetYears):

    network = load_network_for(budgetYears)

    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 10

    candidates, rankings = cl.centrality_leaders(g)

    ordered_list = []
    for r in range(len(rankings))[:topK]:
        #logger.info('tier: %d'%r)
        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better
            g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r

    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    filename = '%s/figures/%s-%s-centrality-leaders.png' % (
        root_folder(), startBudgetYear, endBudgetYear)
    draw(g, filename)

    logger.info(ordered_list)
Esempio n. 2
0
def centrality_leaders(budgetYears):

	network = load_network_for(budgetYears)

	g = network.g.copy()

	g = ResearchCollaborationNetwork.largest_component(g)

	topK = 10

	candidates, rankings = cl.centrality_leaders(g)

	ordered_list = []
	for r in range(len(rankings))[:topK]:
		#logger.info('tier: %d'%r)
		for i in list(rankings[r]):
			node_name = g.vs[candidates[i]]['name']
			ordered_list.append(node_name)
			# set the node's centrality_leader attribute, the higher the better
			g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	filename = '%s/figures/%s-%s-centrality-leaders.png' % (root_folder(), startBudgetYear, endBudgetYear)
	draw(g, filename)

	logger.info(ordered_list)
Esempio n. 3
0
def smallworldness(network, rep=1000):

    g = network.g.copy()
    #logger.info(g.summary())
    # there is no point to consider a disconnected graph ( the average path length means nothing)
    g = ResearchCollaborationNetwork.largest_component(g)

    n = len(g.vs)
    m = len(g.es)

    p = float(m) * 2 / (n * (n - 1))

    # sharp threshold: define the connectedness of a ER graph http://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model
    c = float((np.exp(1) + 1)) * np.log(n) / n

    logger.info(
        "Small-world-ness measure: %d iterations; Erdos_Renyi: p = %f (%d/%d), n  = %d, np = %f, (1 + e) * (ln n / n) = %f"
        % (rep, p, m, (n * (n - 1)) / 2, n, n * p, c))

    ss = []

    for bb in range(rep):
        rg = igraph.Graph.Erdos_Renyi(n, p, directed=False, loops=False)

        s = smallworldness_measure(g, rg)

        ss.append(s)

    mean_s = np.mean(ss)

    return mean_s, ss
Esempio n. 4
0
def smallworldness(network, rep = 1000):

	g = network.g.copy()
	#logger.info(g.summary())
	# there is no point to consider a disconnected graph ( the average path length means nothing)
	g = ResearchCollaborationNetwork.largest_component(g)
	
	n = len(g.vs)
	m = len(g.es)

	p = float(m) * 2 /(n*(n-1))

	# sharp threshold: define the connectedness of a ER graph http://en.wikipedia.org/wiki/Erd%C5%91s%E2%80%93R%C3%A9nyi_model
	c = float((np.exp(1) + 1)) * np.log(n) / n

	logger.info("Small-world-ness measure: %d iterations; Erdos_Renyi: p = %f (%d/%d), n  = %d, np = %f, (1 + e) * (ln n / n) = %f"%(rep, p, m, (n*(n-1))/2, n, n * p,  c))
	
	ss = []

	for bb in range(rep):
		rg = igraph.Graph.Erdos_Renyi(n, p, directed = False, loops = False)

		s = smallworldness_measure(g, rg)

		ss.append(s)

	mean_s = np.mean(ss)

	return mean_s, ss
Esempio n. 5
0
def update_graphml(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    network = load_network_for(budgetYears)

    network.g.vs['centrality_leader'] = 0
    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 50

    candidates, rankings = cl.centrality_leaders(g)

    #ordered_list = []
    for r in range(len(rankings))[:topK]:
    #logger.info('tier: %d'%r)
        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            # ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better

            #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r
            node = network.g.vs.select(name_eq=node_name)
            node['centrality_leader'] = topK - r
            #logger.info(topK - r)
            # logger.info(node['name'])

    filename = '%s/data/networks/%d-%d.graphml' % (root_folder(),
                                                   startBudgetYear, endBudgetYear)

    network.write(filename)
Esempio n. 6
0
def average_strength_for(budgetYears):

	logger.info(budgetYears)

	network = load_network_for(budgetYears)
	g = network.g.copy()

	# pick the largest component of the network, the subgraph without any isolated nodes (nodes that are not connected to any other nodes)
	g = ResearchCollaborationNetwork.largest_component(g)

	g = set_category_by_is_ctsa(g, refG)

	logger.info('ctsa: %0.2f'%average_strength(g, 1.0))
	logger.info('non-ctsa: %0.2f'%average_strength(g, 0.0))
Esempio n. 7
0
def average_shortest_path_for(budgetYears):

	logger.info(budgetYears)

	network = load_network_for(budgetYears)
	g = network.g.copy()

	# pick the largest component of the network, the subgraph without any isolated nodes (nodes that are not connected to any other nodes)
	g = ResearchCollaborationNetwork.largest_component(g)

	g = set_category_by_is_ctsa(g, refG)

	weights = [ 1/weight for weight in g.es['weight']]

	logger.info('within non-CTSA investigators: %0.3f'%average_shortest_path(g, weights = weights, source = 0.0, target = 0.0))
	logger.info('within CTSA investigators: %0.3f'%average_shortest_path(g, weights = weights, source=1.0, target = 1.0))
	#logger.info('from CTSA to non-CTSA: %0.3f'%average_shortest_path(g, weights = weights, source = 1.0, target = 0.0))
	logger.info('from non-CTSA to all: %0.3f'%average_shortest_path(g, weights = weights, source = 0.0, target = None))
	logger.info('from CTSA to all: %0.3f'%average_shortest_path(g, weights = weights, source = 1.0, target = None))
Esempio n. 8
0
def update_graphml(budgetYears):
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]

    network = load_network_for(budgetYears)

    network.g.vs['centrality_leader'] = 0
    g = network.g.copy()

    g = ResearchCollaborationNetwork.largest_component(g)

    topK = 50

    candidates, rankings = cl.centrality_leaders(g)

    #logger.info(candidates)
    #logger.info(rankings)

    #ordered_list = []
    for r in range(len(rankings))[:topK]:

        logger.info('tier: %d' % r)

        for i in list(rankings[r]):
            node_name = g.vs[candidates[i]]['name']
            # ordered_list.append(node_name)
            # set the node's centrality_leader attribute, the higher the better

            #g.vs[candidates[i]]['centrality_leader'] = topK + 1 - r
            node = network.g.vs.select(name_eq=node_name)
            #logger.info(node['name'])
            node['centrality_leader'] = r + 1
            #logger.info(topK - r)
            # logger.info(node['name'])

    filename = '%s/data/networks/%d-%d.graphml' % (
        root_folder(), startBudgetYear, endBudgetYear)

    network.write(filename)
Esempio n. 9
0
def network_to_d3(budgetYears):

    network = load_network_for(budgetYears)
    #network = ResearchCollaborationNetwork.read(budgetYears)
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]
    filename = '%s/data/networks/%s-%s-complete.json' % (root_folder(),
                                                         startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(network.g, filename)

    # remove isolated nodes
    g = network.g.copy()
    g = ResearchCollaborationNetwork.simplify(g)
    filename = '%s/data/networks/%s-%s.json' % (root_folder(),
                                                startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)

    # only the largest components
    g = network.g.copy()
    g = ResearchCollaborationNetwork.largest_component(g)
    filename = '%s/data/networks/%s-%s-largest-component.json' % (root_folder(),
                                                                  startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)
Esempio n. 10
0
def network_to_d3(budgetYears):

    network = load_network_for(budgetYears)
    #network = ResearchCollaborationNetwork.read(budgetYears)
    startBudgetYear = budgetYears[0]
    endBudgetYear = budgetYears[-1]
    filename = '%s/data/networks/%s-%s-complete.json' % (
        root_folder(), startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(network.g, filename)

    # remove isolated nodes
    g = network.g.copy()
    g = ResearchCollaborationNetwork.simplify(g)
    filename = '%s/data/networks/%s-%s.json' % (root_folder(), startBudgetYear,
                                                endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)

    # only the largest components
    g = network.g.copy()
    g = ResearchCollaborationNetwork.largest_component(g)
    filename = '%s/data/networks/%s-%s-largest-component.json' % (
        root_folder(), startBudgetYear, endBudgetYear)
    ResearchCollaborationNetwork.d3(g, filename)
Esempio n. 11
0
def draw_g(budgetYears):
	network = load_network_for(budgetYears)
	
	g = network.g.copy()
	#g = g.simplify(multiple=True, loops=True,combine_edges=sum)

	# convert to undirected
	#g.to_undirected(combine_edges=sum)

	g = ResearchCollaborationNetwork.simplify(g)

	startBudgetYear = budgetYears[0]
	endBudgetYear = budgetYears[-1]

	filename = '%s/figures/%s-%s-%d.png'%(root_folder(),startBudgetYear, endBudgetYear,len(g.vs))
	#logger.info(g.summary())
	draw(g, filename)

	gl = ResearchCollaborationNetwork.largest_component(g)
	
	filename = '%s/figures/%s-%s-%d-largest-component.png'%(root_folder(),startBudgetYear, endBudgetYear,len(gl.vs))

	draw(gl, filename)
Esempio n. 12
0
def network_characteristics(budgetYears):

	logger.info("================================================================")
	logger.info(budgetYears)

	network = load_network_for(budgetYears)

	g = network.g.copy()

	# simplified network is the one without any isolated nodes (nodes that are not connected to any other nodes)
	g = ResearchCollaborationNetwork.simplify(g)

	logger.info('# of nodes: %d'%(len(g.vs)))

	logger.info('# of edges: %d'%(len(g.es)))

	logger.info('density: %.3f'%(g.density()))

	new_edges = 0.0

	# 2006 is the baseline
	if budgetYears[0] > 2006:
		if budgetYears[0]  == 2010 and budgetYears[-1] == 2012:
			pBudgetYears = range(2006,2010)
		else:
			pBudgetYears = np.array(budgetYears) - 1

		pNetwork = load_network_for(pBudgetYears)
		pg = pNetwork.g.copy()
		pg = ResearchCollaborationNetwork.simplify(pg)

		new_edges = average_number_of_new_edges(g, pg)
	logger.info('average number of new edges: %.3f'%new_edges)

	logger.info('# of isolated components: %d'%(num_of_isolated_components(g)))


	# only the largest component, mainly because shortest path length is rather arbitrary on graphs with isolated components, which our RCNs are.
	g = ResearchCollaborationNetwork.largest_component(g)
	weights = g.es['weight']
	r_weights = [ 1/float(weight) for weight in g.es['weight']]
	no_weigths = [ 1 for weight in g.es['weight']]

	logger.info('# of nodes (largest component): %d'%(len(g.vs)))

	logger.info('# of edges (largest component): %d'%(len(g.es)))

	C_g = g.transitivity_avglocal_undirected(mode='zero', weights=no_weigths)
	logger.info('C_g (weights = None): %.3f'%C_g)

	C_wg = g.transitivity_avglocal_undirected(mode='zero', weights=weights)
	logger.info('C_g (weights = number of collaborations): %.3f'%C_wg)

	C_tg = g.transitivity_undirected(mode='zero')
	logger.info('C_g (triplets definition): %.3f'%C_tg)

	L_g = average_shortest_path_length_weighted(g, no_weigths)
	logger.info("L_g (weights = 1): %.3f"%L_g)

	L_wg = average_shortest_path_length_weighted(g, r_weights)
	logger.info("L_g (weights = 1/weights): %.3f"%L_wg)

	D_wg = diversity(g, r_weights)
	logger.info("D_g (weights = 1/weights): %.3f"%D_wg)
Esempio n. 13
0
def network_characteristics(budgetYears):

    logger.info(
        "================================================================")
    logger.info(budgetYears)

    network = load_network_for(budgetYears)

    g = network.g.copy()

    # simplified network is the one without any isolated nodes (nodes that are not connected to any other nodes)
    g = ResearchCollaborationNetwork.simplify(g)

    logger.info('# of nodes: %d' % (len(g.vs)))

    logger.info('# of edges: %d' % (len(g.es)))

    logger.info('density: %.3f' % (g.density()))

    new_edges = 0.0

    # 2006 is the baseline
    if budgetYears[0] > 2006:
        if budgetYears[0] == 2010 and budgetYears[-1] == 2012:
            pBudgetYears = range(2006, 2010)
        else:
            pBudgetYears = np.array(budgetYears) - 1

        pNetwork = load_network_for(pBudgetYears)
        pg = pNetwork.g.copy()
        pg = ResearchCollaborationNetwork.simplify(pg)

        new_edges = average_number_of_new_edges(g, pg)
    logger.info('average number of new edges: %.3f' % new_edges)

    logger.info('# of isolated components: %d' %
                (num_of_isolated_components(g)))

    # only the largest component, mainly because shortest path length is rather arbitrary on graphs with isolated components, which our RCNs are.
    g = ResearchCollaborationNetwork.largest_component(g)
    weights = g.es['weight']
    r_weights = [1 / float(weight) for weight in g.es['weight']]
    no_weigths = [1 for weight in g.es['weight']]

    logger.info('# of nodes (largest component): %d' % (len(g.vs)))

    logger.info('# of edges (largest component): %d' % (len(g.es)))

    C_g = g.transitivity_avglocal_undirected(mode='zero', weights=no_weigths)
    logger.info('C_g (weights = None): %.3f' % C_g)

    C_wg = g.transitivity_avglocal_undirected(mode='zero', weights=weights)
    logger.info('C_g (weights = number of collaborations): %.3f' % C_wg)

    C_tg = g.transitivity_undirected(mode='zero')
    logger.info('C_g (triplets definition): %.3f' % C_tg)

    L_g = average_shortest_path_length_weighted(g, no_weigths)
    logger.info("L_g (weights = 1): %.3f" % L_g)

    L_wg = average_shortest_path_length_weighted(g, r_weights)
    logger.info("L_g (weights = 1/weights): %.3f" % L_wg)

    D_wg = diversity(g, r_weights)
    logger.info("D_g (weights = 1/weights): %.3f" % D_wg)