def plot_co_x(cox, start, end, size = (20,20), title = '', weighted=False, weight_threshold=10):

        """ Plotting function for keyword graphs

        Parameters
        --------------------
        cox: the coword networkx graph; assumes that nodes have attribute 'topic'
        start: start year
        end: end year
        """

        plt.figure(figsize=size)
        plt.title(title +' %s - %s'%(start,end), fontsize=18)
        if weighted:
            elarge=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] >weight_threshold]
            esmall=[(u,v) for (u,v,d) in cox.edges(data=True) if d['weight'] <=weight_threshold]
            pos=nx.graphviz_layout(cox) # positions for all nodes
            nx.draw_networkx_nodes(cox,pos,
                node_color= [s*4500 for s in nx.eigenvector_centrality(cox).values()],
                node_size = [s*6+20  for s in nx.degree(cox).values()],
                alpha=0.7)
            # edges
            nx.draw_networkx_edges(cox,pos,edgelist=elarge,
                                width=1, alpha=0.5, edge_color='black') #, edge_cmap=plt.cm.Blues
            nx.draw_networkx_edges(cox,pos,edgelist=esmall,
                                width=0.3,alpha=0.5,edge_color='yellow',style='dotted')
            # labels
            nx.draw_networkx_labels(cox,pos,font_size=10,font_family='sans-serif')
            plt.axis('off')
        else:
            nx.draw_graphviz(cox, with_labels=True,
                         alpha = 0.8, width=0.1,
                         fontsize=9,
                         node_color = [s*4 for s in nx.eigenvector_centrality(cox).values()],
                         node_size = [s*6+20 for s in nx.degree(cox).values()])
	def buildGraphFromTwitterFollowing(self):
		while True:
			twitter_id=self.userq.get()
		        #print "======================================"
			twitter_id_dict=json.loads(twitter_id.AsJsonString())
			#print twitter_id_dict["name"]
		        #print i.AsJsonString()
		        #pprint.pprint(i.GetCreatedAt())
		        #pprint.pprint(i.GetGeo())
		        #pprint.pprint(i.GetLocation())
		        #pprint.pprint(i.GetText())
			for f in self.api.GetFollowers(twitter_id):
				try:
					follower_id_dict=json.loads(f.AsJsonString())
					#print follower_id_dict["name"]
					self.tng.add_edge(twitter_id_dict["name"],follower_id_dict["name"])
					self.userq.put(f)	
					self.no_of_vertices+=1
				except:
					pass
			if self.no_of_vertices > 50:
				break
			print "======================================"
		nx.shell_layout(self.tng)
		nx.draw_networkx(self.tng)
		print "==========================================================================================="
		print "Bonacich Power Centrality of the Social Network (Twitter) Crawled - computed using PageRank"
		print "(a degree centrality based on social prestige)"
		print "==========================================================================================="
		print sorted(nx.pagerank(self.tng).items(),key=operator.itemgetter(1),reverse=True)
		print "==========================================================================================="
		print "Eigen Vector Centrality"
		print "==========================================================================================="
		print nx.eigenvector_centrality(self.tng)
		plt.show()
def eigenvector_component(seed_num, graph_json_filename=None, graph_json_str=None):
  if graph_json_filename is None and graph_json_str is None:
    return []

  G = None
  if graph_json_str is None:
    G = util.load_graph(graph_json_filename=graph_json_filename)
  else:
    G = util.load_graph(graph_json_str=graph_json_str)

  components = list(nx.connected_components(G))
  components = filter(lambda x: len(x) > 0.1 * len(G), components)
  total_size = sum(map(lambda x: len(x), components))
  total_nodes = 0
  rtn = []
  for comp in components[1:]:
    num_nodes = int(float(len(comp)) / total_size * seed_num)
    component = G.subgraph(list(comp))
    clse_cent = nx.eigenvector_centrality(component)
    collector = collections.Counter(clse_cent)
    clse_cent = collector.most_common(num_nodes)
    rtn += map(lambda (x, y): x, clse_cent)
    total_nodes += num_nodes

  num_nodes = seed_num - total_nodes
  component = G.subgraph(list(components[0]))
  clse_cent = nx.eigenvector_centrality(component)
  collector = collections.Counter(clse_cent)
  clse_cent = collector.most_common(num_nodes)
  rtn += map(lambda (x, y): x, clse_cent)
  return rtn
def eigenvector(g, recalculate=False):
    """
    Performs robustness analysis based on eigenvector centrality,  
    on the network specified by infile using sequential (recalculate = True) 
    or simultaneous (recalculate = False) approach. Returns a list 
    with fraction of nodes removed, a list with the corresponding sizes of 
    the largest component of the network, and the overall vulnerability 
    of the network.
    """

    m = networkx.eigenvector_centrality(g, max_iter=5000)
    l = sorted(m.items(), key=operator.itemgetter(1), reverse=True)
    x = []
    y = []
    largest_component = max(networkx.connected_components(g), key=len)
    n = len(g.nodes())
    x.append(0)
    y.append(len(largest_component) * 1. / n)
    r = 0.0
    for i in range(1, n - 1):
        g.remove_node(l.pop(0)[0])
        if recalculate:

            try:
                m = networkx.eigenvector_centrality(g, max_iter=5000)
            except networkx.NetworkXError:
                break

            l = sorted(m.items(), key=operator.itemgetter(1),
                       reverse=True)
        largest_component = max(networkx.connected_components(g), key=len)
        x.append(i * 1. / n)
        r += len(largest_component) * 1. / n
        y.append(len(largest_component) * 1. / n)
    return x, y, r / n
def sna_calculations(g, play_file):
    """
    :param g: a NetworkX graph object
    :type g: object
    :param play_file: the location of a play in .txt format
    :type play_file: string
    :return: returns a dictionary containing various network related figures
    :rtype: dict
    :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv
    """
    file_name = os.path.splitext(os.path.basename(play_file))[0]
    sna_calculations_list = dict()
    sna_calculations_list['playType'] = file_name[0]
    sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()),
                                                                            dtype=float))
    sna_calculations_list['avDegreeCentralityStd'] = numpy.std(
        numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float))
    sna_calculations_list['avInDegreeCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float))
    sna_calculations_list['avOutDegreeCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float))

    try:
        sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g)
    except:
        sna_calculations_list['avShortestPathLength'] = 'not connected'

    sna_calculations_list['density'] = nx.density(g)
    sna_calculations_list['avEigenvectorCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float))
    sna_calculations_list['avBetweennessCentrality'] = numpy.mean(
        numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float))
    sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g)
    sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g)
    sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g)

    # sna_calculations.txt file
    sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL,
                               delimiter=';')
    for key, value in sna_calculations_list.items():
        sna_calc_file.writerow([key, value])

    # all_characters.csv file
    if not os.path.isfile('results/allCharacters.csv'):
        with open('results/allCharacters.csv', 'w') as f:
            f.write(
                'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n')

    all_characters = open('results/allCharacters.csv', 'a')
    character_speech_amount = speech_amount(play_file)
    for character in sna_calculations_list['DegreeCentrality']:
        all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str(
            sna_calculations_list['DegreeCentrality'][character]) + ';' + str(
            sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str(
            sna_calculations_list['BetweennessCentrality'][character]) + ';' + str(
            character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n')
    all_characters.close()

    return sna_calculations
Esempio n. 6
0
	def eigenvector_centrality(self, iterations, withme=False, node=None, average=False):
		my_dict = nx.eigenvector_centrality(self.mynet,
			max_iter = iterations)

		if node==None:
			if withme:
				my_dict =nx.eigenvector_centrality(self.mynet,
					max_iter = iterations)
				new = {}
				new2={}
				for i in my_dict:
					new[self.id_to_name(i)] = my_dict[i]
					new2[i] = my_dict[i]
				if average:
					print "The average is " + str(round(sum(new.values())/float(len(new.values())),4))
				else:
					for i,j in new.items():
						print i, round(j,4)
					return new2
			else:

				my_dict = nx.eigenvector_centrality(self.no_ego_net,
					max_iter = iterations)

				new = {}
				new2={}
				for i in my_dict:
					new[self.id_to_name(i)] = my_dict[i]
					new2[i] = my_dict[i]
				if average:
					print "The average is " + str(round(sum(new.values())/float(len(new.values())),4))
				else:
					for i,j in new.items():
						print i, round(j,4)
					return new2


		else:
			if withme:
				my_dict = nx.eigenvector_centrality(self.mynet,max_iter = iterations)
				try:
					print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4))
				except:
					try:
						return my_dict[self.name_to_id(node)]
					except:
						print "Invalid node name"
			else:
				my_dict = nx.eigenvector_centrality(self.no_ego_net,max_iter = iterations)
				try:
					print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[node],4))
				except:
					try:
						print "The coefficient for node "+str(node)+ "is "+ str(round(my_dict[[self.name_to_id(node)]],4))
					except:
						print "Invalid node name"
    def centrality_measures(self):

        centrality_measures = []
        txt = ''
        
        # betweenness
        # unweighted
        self.unweighted_betweenness_distribution	= nx.betweenness_centrality(self.G)
        statistics		= self.Stats.get_distribution_info(self.unweighted_betweenness_distribution)
        centrality_measures.extend(statistics[:5])
        centrality_measures.extend(statistics[5])
        txt += ',average betweenness centrality (unweighted)' + self.standard_text_distribution

        # # weighted
        self.weighted_betweenness_distribution		= nx.betweenness_centrality(self.G, weight = self.weight_id)
        # statistics		= self.Stats.get_distribution_info(self.weighted_betweenness_distribution)
        # centrality_measures.extend(statistics[:5])
        # centrality_measures.extend(statistics[5])
        # txt += ',average betweenness centrality (weighted)' + self.standard_text_distribution
        
        # closeness
        # unweighted
        self.unweighted_closeness_distribution	= nx.closeness_centrality(self.G)
        statistics		= self.Stats.get_distribution_info(self.unweighted_closeness_distribution)
        centrality_measures.extend(statistics[:5])
        centrality_measures.extend(statistics[5])
        txt += ',average closeness centrality (unweighted)' + self.standard_text_distribution        
        
        # eigen vector
		# right
        try:
            self.right_eigenvector_distribution	= nx.eigenvector_centrality(self.G)
            statistics	= self.Stats.get_distribution_info(self.right_eigenvector_distribution)
            centrality_measures.extend(statistics[:5])
            centrality_measures.extend(statistics[5])
        except:
            centrality_measures.extend([0,0,0,0,0])
            centrality_measures.extend([0]*len(statistics[5])) 
        txt += ',average right eigenvector' + self.standard_text_distribution
		
		# left
        try:
            G_rev 								= self.G.reverse()
            self.lef_eigenvector_distribution	= nx.eigenvector_centrality(G_rev)
            statistics							= self.Stats.get_distribution_info(self.lef_eigenvector_distribution)
            centrality_measures.extend(statistics[:5])
            centrality_measures.extend(statistics[5])
        except:
            centrality_measures.extend([0,0,0,0,0])
            centrality_measures.extend([0]*len(statistics[5])) 
        txt += ',average left eigenvector' + self.standard_text_distribution

        return [centrality_measures, txt]
def eigenvector_apl(g, recalculate=False):
    """
    Performs robustness analysis based on eigenvector centrality,
    on the network specified by infile using sequential (recalculate = True)
    or simultaneous (recalculate = False) approach. Returns a list
    with fraction of nodes removed, a list with the corresponding sizes of
    the largest component of the network, and the overall vulnerability
    of the network.
    """

    m = networkx.eigenvector_centrality(g)
    l = sorted(m.items(), key=operator.itemgetter(1), reverse=True)
    x = []
    y = []

    average_path_length = 0.0
    number_of_components = 0
    n = len(g.nodes())

    for sg in networkx.connected_component_subgraphs(g):
        average_path_length += networkx.average_shortest_path_length(sg)
    number_of_components += 1

    average_path_length /= number_of_components
    initial_apl = average_path_length

    r = 0.0
    for i in range(1, n - 1):
        g.remove_node(l.pop(0)[0])
        if recalculate:

            try:
                m = networkx.eigenvector_centrality(g, max_iter=5000)
            except networkx.NetworkXError:
                break

            l = sorted(m.items(), key=operator.itemgetter(1),
                       reverse=True)
        average_path_length = 0.0
        number_of_components = 0

        for sg in networkx.connected_component_subgraphs(g):
            if len(sg.nodes()) > 1:
                average_path_length += networkx.average_shortest_path_length(sg)
            number_of_components += 1

        average_path_length = average_path_length / number_of_components

        x.append(i * 1. / initial_apl)
        r += average_path_length * 1. / initial_apl
        y.append(average_path_length * 1. / initial_apl)
    return x, y, r / initial_apl
Esempio n. 9
0
def eigValue(charList, graphFile, bookNetworksPath):
    # Compute eigenvectors for all characters in the current chapter graph.
    g = nx.read_gexf(graphFile)
    eigCentrality = nx.eigenvector_centrality(g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight")
    eigValues = eigCentrality.values()

    # NORMALISE eigenvector values
    d = decimal.Decimal
    maxEig = max(eigValues)
    minEig = min(eigValues)
    maxMinusMin = d(maxEig) - d(minEig)

    if not charList:
        # Get top 10 overall characters from overall.gexf graph
        overallGraphFile = bookNetworksPath + "overall.gexf"
        overall_g = nx.read_gexf(overallGraphFile)
        overallEigCent = nx.eigenvector_centrality(overall_g, max_iter=100, tol=1.0e-6, nstart=None, weight="Weight")

        # sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True)[:10])
        sortedCentrality = dict(sorted(overallEigCent.iteritems(), key=itemgetter(1), reverse=True))
        sortedCentrality = sorted(sortedCentrality.iteritems(), key=itemgetter(1), reverse=True)

        charList = [seq[0] for seq in sortedCentrality]
        return charList

    else:
        charList = [item for item in charList]

        for index, item in enumerate(charList):
            currentChar = None
            for key, value in eigCentrality.iteritems():
                if key == item:
                    # Unnormalised version...
                    charList[index] = (key, str(value))
                    currentChar = key
                # if key == item:
                #     nummerator = d(value)-d(minEig)
                #     if nummerator==0:
                #         charList[index] = (key, str(0))
                #     else:
                #         norm_value = (d(value)-d(minEig))/d(maxMinusMin)
                #         charList[index] = (key, str(norm_value))
                #     currentChar = key
            # If current character is not present in the current chapter assign 0 influence.
            if not currentChar:
                charList[index] = (item, 0)

        return charList
Esempio n. 10
0
def relevant_stats(G):
	cloC = nx.closeness_centrality(G, distance = 'distance')
	betC = nx.betweenness_centrality(G, weight = 'distance')
	katC = nx.katz_centrality(G)
	eigC = nx.eigenvector_centrality(G)

	return
Esempio n. 11
0
def set_capacities_eigenvector_gravity(topology, capacities,
                                       capacity_unit='Mbps', max_iter=1000):
    """
    Set link capacities proportionally to the product of the eigenvector
    centralities of the two end-points of the link

    Parameters
    ----------
    topology : Topology
        The topology to which link capacities will be set
    capacities : list
        A list of all possible capacity values
    capacity_unit : str, optional
        The unit in which capacity value is expressed (e.g. Mbps, Gbps etc..)
    max_iter : int, optional
        The max number of iteration of the algorithm allowed. If a solution is
        not found within this period

    Raises
    ------
    RuntimeError : if the algorithm does not converge in max_iter iterations
    """
    try:
        centrality = nx.eigenvector_centrality(topology, max_iter=max_iter)
    except nx.NetworkXError:
        raise RuntimeError('Algorithm did not converge in %d iterations'
                           % max_iter)
    _set_capacities_gravity(topology, capacities, centrality, capacity_unit)
Esempio n. 12
0
File: sna.py Progetto: dgawlik/ed
    def all_users_popular_nodes(self):

        Gall = self._graph_from_cursor('graph3')

        slots = []
        for i in range(1,7):
            G = self._graph_from_cursor('all_posts_s%d' % i)
            slots.append(G)

        degree = nx.degree_centrality(G).items()
        eigen = nx.eigenvector_centrality(G).items()
        betweeness = nx.betweenness_centrality(G, k=20).items()

        topDegree = sorted(degree, key=lambda (n,x): x, reverse=True)[:10]
        topEigen = sorted(eigen, key=lambda (n,x): x, reverse=True)[:10]
        topBetweeness = sorted(betweeness, key=lambda (n,x): x, reverse=True)[:10]

        topDegreeIds = map(lambda (n,x): n, topDegree)
        topEigenIds = map(lambda (n,x): n, topEigen)
        topBetweenessIds = map(lambda (n,x): n, topBetweeness)

        inter = list(set(topDegreeIds).intersection(topEigenIds).intersection(topBetweenessIds))
        union = list(set(topDegreeIds).union(topEigenIds).union(topBetweenessIds))

        out = StringIO.StringIO()
        writer = csv.writer(out, delimiter='|', quoting=csv.QUOTE_NONE)
def eigenvector_neighbors(seed_num, graph=None, graph_json_filename=None, graph_json_str=None):
  if graph_json_filename is None and graph_json_str is None and graph is None:
    return []

  G = None
  if graph is not None:
    G = graph
  elif graph_json_str is None:
    G = util.load_graph(graph_json_filename=graph_json_filename)
  else:
    G = util.load_graph(graph_json_str=graph_json_str)

  clse_cent = nx.get_node_attributes(G, "centrality")
  if len(clse_cent) == 0:
    clse_cent = nx.eigenvector_centrality(G)
    nx.set_node_attributes(G, "centrality", clse_cent)
    print "hi eigen-vector neighbors"
  collector = collections.Counter(clse_cent)
  clse_cent = collector.most_common(SURROUND_TOP)
  nodes = map(lambda (x, y): x, clse_cent)

  current_seed = 0
  rtn = []
  while current_seed < seed_num:
    current_node = nodes[current_seed % len(nodes)]
    current_neighbors = G.neighbors(current_node)
    rtn += random.sample(set(current_neighbors) - set(rtn) - set(nodes), 1)
    current_seed += 1

  return rtn
Esempio n. 14
0
 def centralities(self):
     '''
     Get info on centralities of data
     Params:
         None
     Returns:
         dictionary of centrality metrics with keys(centralities supported):
             degree - degree centrality
             betweeness - betweeness centrality
             eigenvector - eigenvector centrality
             hub - hub scores - not implemented
             authority - authority scores - not implemented
             katz - katz centrality with params X Y
             pagerank - pagerank centrality with params X Y
     '''
     output = {}
     output['degree'] = nx.degree_centrality(self.G)
     output['betweeness'] = nx.betweenness_centrality(self.G)
     try:
         output['eigenvector'] = nx.eigenvector_centrality(self.G)
         output['katz'] = nx.katz_centrality(self.G)
     except:
         output['eigenvector'] = 'empty or exception'
         output['katz'] = 'empty or exception'
     # output['hub'] = 'Not implemented'
     # output['authority'] = 'Not implemented'
     # output['pagerank'] = 'Not implemented'
     return output
Esempio n. 15
0
    def main(self):
        G = nx.Graph()
        retweet = self.gettweet(TWEETID)
        retweet['jyunichidesita'] = (datetime.datetime(2014, 4, 28, 0, 0, 0),) # 炎上ツイートのユーザーのみの対応
        userlist = retweet.keys()
        # node = self.getRTchannel('jyunichidesita', userlist, retweet) # 炎上ツイートをRTし、且つそのツイートのユーザーとリンクのあるユーザーの名前のリスト
        # 炎上ツイートを始めにRetweetしたユーザーをLIMIT件取得
        node = self.getnode(TWEETID, LIMIT)
        while node:
            next = []
            for n in node:
                tmp = self.getRTchannel(n, userlist, retweet)
                next.extend(tmp)
                edges = [(userlist.index(n), userlist.index(x)) for x in tmp]
                G.add_edges_from(edges)
            else:
                node = next

        degree = nx.degree(G)
        close = nx.closeness_centrality(G)
        bet = nx.betweenness_centrality(G)
        eigen = nx.eigenvector_centrality(G)
        f = open('../../data/output/diffusion/centrality_all.csv', 'w')
        for k,v in sorted(degree.items(), key=lambda x: x[1], reverse=True):
            f.write(str(userlist[k]) + ',' + str(retweet[userlist[k]][0]) + ',' + str(retweet[userlist[k]][3]) + ',' + str(v) + ',' + str(close[k]) + ',' + str(bet[k]) + ',' + str(eigen[k]) + '\n')
        f.close()

        nx.draw(G, node_size=50)
        plt.savefig("../../data/output/diffusion/undirected_all.png")
        plt.show()
Esempio n. 16
0
def distinct(g):
    bc = nx.betweenness_centrality(g)
    cc = nx.closeness_centrality(g)
    dc = nx.degree_centrality(g)
    ec = nx.eigenvector_centrality(g)

    return [maximum(bc), maximum(cc), maximum(dc), maximum(ec)]
def describe(G, ny_tri, chems):
	global describeNetwork
	'''
	Describe the network: degrees, clustering, and centrality measures
	'''	
	# Degree
	# The number of connections a node has to other nodes.
	degrees= nx.degree(G)
	degrees_df = pd.DataFrame(degrees.items(), columns=['Facility', 'Degrees'])
	values = sorted(set(degrees.values())) 
	hist = [degrees.values().count(x) for x in values]
	plt.figure()
	plt.plot(values, hist,'ro-') # degree
	plt.xlabel('Degree')
	plt.ylabel('Number of nodes')
	plt.title('Degree Distribution')
	plt.savefig('output/degree_distribution.png')

	# Clustering coefficients
	# The bipartie clustering coefficient is a measure of local density of connections.
	clust_coefficients = nx.clustering(G)
	clust_coefficients_df = pd.DataFrame(clust_coefficients.items(), columns=['Facility', 'Clustering Coefficient'])
	clust_coefficients_df = clust_coefficients_df.sort('Clustering Coefficient', ascending=False)
	#print clust_coefficients_df

	# Node centrality measures
	FCG=list(nx.connected_component_subgraphs(G, copy=True))[0]
	# Current flow betweenness centrality
	# Current-flow betweenness centrality uses an electrical current model for information spreading 
	# in contrast to betweenness centrality which uses shortest paths.
	betweeness = nx.current_flow_betweenness_centrality(FCG)
	betweeness_df = pd.DataFrame(betweeness.items(), columns=['Facility', 'Betweeness'])
	betweeness_df = betweeness_df.sort('Betweeness', ascending=False)
	# Closeness centrality
	# The closeness of a node is the distance to all other nodes in the graph 
	# or in the case that the graph is not connected to all other nodes in the connected component containing that node.
	closeness = nx.closeness_centrality(FCG)
	closeness_df = pd.DataFrame(closeness.items(), columns=['Facility', 'Closeness'])
	closeness_df = closeness_df.sort('Closeness', ascending=False)
	# Eigenvector centrality
	# Eigenvector centrality computes the centrality for a node based on the centrality of its neighbors.
	# In other words, how connected a node is to other highly connected nodes.
	eigenvector = nx.eigenvector_centrality(FCG)
	eigenvector_df = pd.DataFrame(eigenvector.items(), columns=['Facility', 'Eigenvector'])
	eigenvector_df = eigenvector_df.sort('Eigenvector', ascending=False)

	# Create dataframe of facility info
	fac_info = ny_tri[['tri_facility_id','facility_name', 'primary_naics', 'parent_company_name']].drop_duplicates()
	fac_info.rename(columns={'facility_name':'Facility'}, inplace=True)

	# Merge everything
	describeNetwork = degrees_df.merge(
		clust_coefficients_df,on='Facility').merge(
		betweeness_df,on='Facility').merge(
		closeness_df, on='Facility').merge(
		eigenvector_df, on='Facility').merge(
		fac_info, on='Facility', how='left').merge(
		chems, on='Facility', how='left')
	describeNetwork = describeNetwork.sort('Degrees', ascending=False)
	describeNetwork.to_csv('output/describeNetwork.csv')
Esempio n. 18
0
	def create_authors(corpus):
		"""
		Generate the authors_profile.tsv file
		To perform just ONE time
		:type corpus: pandas.DataFrame
		:return:
		"""

		tpc = TopicsClassifier(pd_corpus=corpus)
		pp = PredictionProfile(pd_corpus=corpus)

		for index, tweet in corpus.iterrows():
			u = User(tweet.User_Name)
			u.load()
			u.update_profile(tweet.Vector, predict=False)
			u.save()

		graph = User.load_graph()
		centralities = nx.eigenvector_centrality(graph)
		for author in User.get_all_authors():
			author.centrality = centralities[author.id] if author.id in centralities else 0.
			author.set_prediction_profile(pp)
			author.set_topic_classifier(tpc)
			author.predict_profile()
			author.save()
		return
Esempio n. 19
0
 def most_central(self,F=1,cent_type='betweenness'):
     if cent_type == 'betweenness':
         ranking = nx.betweenness_centrality(self.G).items()
     elif cent_type == 'closeness':
         ranking = nx.closeness_centrality(self.G).items()
     elif cent_type == 'eigenvector':
         ranking = nx.eigenvector_centrality(self.G).items()
     elif cent_type == 'harmonic':
         ranking = nx.harmonic_centrality(self.G).items()
     elif cent_type == 'katz':
         ranking = nx.katz_centrality(self.G).items()
     elif cent_type == 'load':
         ranking = nx.load_centrality(self.G).items()
     elif cent_type == 'degree':
         ranking = nx.degree_centrality(self.G).items()
     ranks = [r for n,r in ranking]
     cent_dict = dict([(self.lab[n],r) for n,r in ranking])
     m_centrality = sum(ranks)
     if len(ranks) > 0:
         m_centrality = m_centrality/len(ranks)
     #Create a graph with the nodes above the cutoff centrality- remove the low centrality nodes
     thresh = F*m_centrality
     lab = {}
     for k in self.lab:
         lab[k] = self.lab[k]
     g = Graph(self.adj.copy(),self.char_list)
     for n,r in ranking:
         if r < thresh:
             g.G.remove_node(n)
             del g.lab[n]
     return (cent_dict,thresh,g)
Esempio n. 20
0
def main():
    # n = get_node_list('Output.txt')
    # save_mapper_file(n, 'Mapper.txt')
    # anonymize_names("Output.txt", 'AnOutput.txt')
    an = get_node_list('AnOutput.txt')

    G = create_graph('AnOutput.txt', True)
    #in_deg_res, out_deg_res = get_degree_counts(G, an)
    # print(in_deg_res, out_deg_res)
    G1 = create_graph_for_snap(an, 'AnOutput.txt')

    # snap_traids = snap.GetTriads(G1)
    # triads = nx.transitivity(G)

    pagerank = nx.pagerank(G)
    max_pagerank = key_with_max_val(pagerank)
    import operator
    a = sorted(pagerank.items(), key=operator.itemgetter(1), reverse=True)
    print max_pagerank

    centrality = nx.in_degree_centrality(G)
    a = sorted(centrality.items(), key=operator.itemgetter(1), reverse=True)

    eigen_vector_centrality = nx.eigenvector_centrality(G)
    a = sorted(eigen_vector_centrality.items(), key=operator.itemgetter(1), reverse=True)

    # snap_dia = snap.GetBfsFullDiam(G1, 10)
    # dia = nx.diameter(G)
    avg_local_clustering_coeff = nx.average_clustering(G)
    print avg_local_clustering_coeff
    #global_clustering_coeff = snap.GetClustCf(G1, -1)
    #print global_clustering_coeff
    #plot_data = diameter_phase_transition()
    pass
    def test_K5(self):
        """Eigenvector centrality: K5"""
        G = nx.complete_graph(5)
        b = nx.eigenvector_centrality(G)
        v = math.sqrt(1 / 5.0)
        b_answer = dict.fromkeys(G, v)
        for n in sorted(G):
            assert_almost_equal(b[n], b_answer[n])
        nstart = dict([(n, 1) for n in G])
        b = nx.eigenvector_centrality(G, nstart=nstart)
        for n in sorted(G):
            assert_almost_equal(b[n], b_answer[n])

        b = nx.eigenvector_centrality_numpy(G)
        for n in sorted(G):
            assert_almost_equal(b[n], b_answer[n], places=3)
Esempio n. 22
0
def eigenvectorcentralitynx(mutualinformation,startingvector):
    #Identical to eigenvectorcentralitynx0, but requires an additional argument startingvector.
    #starting vector provides an initial guess for the eigen vector centrality of all nodes.
    #startingvector must be a python dictionary. key = node, value = eigenvector centrality estimate.
    G=nx.Graph(mutualinformation)
    eigvcent=nx.eigenvector_centrality(G, weight='weight',max_iter=2000,nstart=startingvector)
    return eigvcent
Esempio n. 23
0
def node_eigenvector_centrality(X):
    """
    based on networkx function: eigenvector_centrality
    """
    XX = np.zeros((X.shape[0], np.sqrt(X.shape[1])))
    for i, value in enumerate(X):
        adj_mat = value.reshape((np.sqrt(len(value)),-1))
        adj_mat = (adj_mat - np.min(adj_mat)) / (np.max(adj_mat) - np.min(adj_mat))
        adj_mat = 1 - adj_mat

#        th = np.mean(adj_mat) - 0.2
#        adj_mat = np.where(adj_mat < th, adj_mat, 0.)

        percent, th, adj_mat, triu = percentage_removed(adj_mat, 0.78)
        print("percent = {0}, threshold position = {1}, threshold = {2}\n".format(percent, th, triu[th]))

        g = nx.from_numpy_matrix(adj_mat)
        print "Graph Nodes = {0}, Graph Edges = {1} ".format(g.number_of_nodes(), g.number_of_edges())
        print "\nEdge kept ratio, {0}".format(float(g.number_of_edges())/((g.number_of_nodes()*(g.number_of_nodes()-1))/2))

        deg_cent = nx.eigenvector_centrality(g, max_iter=10000)
        node_cent = np.zeros(g.number_of_nodes())

        for k in deg_cent:
            node_cent[k] = deg_cent[k]
        XX[i] = node_cent
        print "graph {0} => mean {1}, min {2}, max {3}".format(i, np.mean(XX[i]), np.min(XX[i]), np.max(XX[i]))
#    XX = XX*100
    ss = StandardScaler()
    XX = ss.fit_transform(XX.T).T

    return XX
def _calc_centrality_totals(graph):
    """
    Calculates the eigenvector centrality for every node in
    a graph, then assigns those centralities to different
    demographic groups.

    @param graph: the graph to calculate centrality for
    @return a dict mapping gender string to list of centralities
    @return a dict mapping major name to list of centralities
    @return a dict mapping activity name to list of centralities
    """
    eigen_centralities = nx.eigenvector_centrality(graph)
    gender_eigen_totals = {}
    major_eigen_totals = {}
    ec_eigen_totals = {}
    for node in graph.nodes(data=True):
        gender = node[1]['gender']
        major = node[1]['area_of_study']
        extra_currics = node[1]['extra_curricular']
        if gender in gender_eigen_totals:
            gender_eigen_totals[gender].append(eigen_centralities[node[0]])
        else:
            gender_eigen_totals[gender] = []
        if major in major_eigen_totals:
            major_eigen_totals[major].append(eigen_centralities[node[0]])
        else:
            major_eigen_totals[major] = []
        for ec in extra_currics:
            if ec in ec_eigen_totals:
                ec_eigen_totals[ec].append(eigen_centralities[node[0]])
            else:
                ec_eigen_totals[ec] = []

    return gender_eigen_totals, major_eigen_totals, ec_eigen_totals
def attack_based_max_eigenvector(G):
    """ Recalculate eigenvector centrality attack
    """
    n = G.number_of_nodes()
    tot_ND = [0] * (n+1)
    tot_T = [0] * (n+1)

    ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
    tot_ND[0] = ND
    tot_T[0] = 0

    for i in range(1, n+1):
        # calculate all nodes' eigenvector centrality
        allEigenvectorCentrality = nx.eigenvector_centrality(G, max_iter=1000, weight=None)
        # get node with max eigenvector centrality       
        node = max(allEigenvectorCentrality, key=allEigenvectorCentrality.get)
        # remove all the edges adjacent to node
        if not nx.is_directed(G):   # undirected graph
            for key in G[node].keys():
                G.remove_edge(node, key)
        else:   # directed graph
            for x in [v for u, v in G.out_edges_iter(node)]:
                G.remove_edge(node, x)
            for x in [u for u, v in G.in_edges_iter(node)]:
                G.remove_edge(x, node)
        ND, ND_lambda = ECT.get_number_of_driver_nodes(G)
        tot_ND[i] = ND
        tot_T[i]  = i
    return (tot_ND, tot_T)
Esempio n. 26
0
 def print_most_often_optimal(self, bests):
   ''' Print those cities which are most often in optimal layouts. '''
   print("The centrality measure previously discussed is a good judge of " +
     "how good building a research center in a city is, but let's check our " +
     "work by counting how many times each city appears in the optimal " +
     "placements.")
   occurences = [item for sublist in bests for subsublist in
     sublist for item in subsublist]
   cities = list(self)
   for city in sorted(cities, key=occurences.count):
     if occurences.count(city):
       print(city, occurences.count(city))
   lone = []
   ltwo = []
   centrality = nx.eigenvector_centrality(self, max_iter=1000)
   for city in cities:
     lone.append(occurences.count(city))
     ltwo.append(centrality[city])
   (corr, pvalue) = stats.pearsonr(lone, ltwo)
   print ("The correlation is " + str((corr, pvalue)))
   lone = []
   ltwo = []
   for city in cities:
     if city != "Atlanta":
       lone.append(occurences.count(city))
       ltwo.append(centrality[city])
   (corr, pvalue) = stats.pearsonr(lone, ltwo)
   print ("The correlation without Atlanta is " + str((corr, pvalue)))
    def _graph_centrality_measures(self, df_totals):
        '''
        INPUT: DataFrame
        OUTPUT: dict, dict, dict

        For every participant, calculates degree centrality, Eigenvector centrality, and
        weighted Eigenvector centrality (the last being weighted by the df's 'cnt' column).
        '''
        df = df_totals.copy()
        df = df[df['participantID'] > df['participantID.B']]
        G = from_pandas_dataframe(df, 'participantID', 'participantID.B', 'cnt')
        degree_centrality = nx.degree_centrality(G)
        eigen_centrality = nx.eigenvector_centrality(G)
        eigen_centrality_weighted = nx.eigenvector_centrality(G, weight='cnt')

        return degree_centrality, eigen_centrality, eigen_centrality_weighted
Esempio n. 28
0
    def __init__(self, time, voteomat):

        self.foldername = voteomat.network_func_name + voteomat.distribution_func_name
        self.foldertime = time
        self.path = "Statistics//"+self.foldername+"//"
        self.path += g_candidates_affecting_nodes + "=" + str(voteomat.candidates_affecting) + "_"
        self.path += g_candidates_affected_by_median + "=" + str(voteomat.candidates_affected) + "_"
        self.path += g_neighbours_affecting_each_other + "=" + str(voteomat.affecting_neighbours) + "_"
        self.path += g_counterforce_affecting_candidates + "=" + str(voteomat.counter_force_affecting) + "_"
        self.path += "counterforce_left="+str(voteomat.counter_force_left)+"_"+"counterforce_right="+str(voteomat.counter_force_right)+ "_" + time
        self.make_sure_path_exists(self.path)
        self.file = open(self.path + "//statistic.csv", 'w')
        self.statistic = {}
        self.statistic["networkfunc"] = voteomat.network_func_name
        self.statistic["distributionfunc"] = voteomat.distribution_func_name
        self.statistic["acceptance"] = voteomat.acceptance
        median, avg, std = voteomat.get_statistic()
        self.statistic["median"] = []
        self.statistic["median"].append(median)
        self.statistic["avg"] = []
        self.statistic["avg"].append(avg)
        self.statistic["std"] = []
        self.statistic["std"].append(std)


        self.statistic["node_with_highest_degree_centrality"] = []
        self.max_degree_node = max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0]

        self.statistic["node_with_highest_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_degree_node][1]["orientation"])
        self.statistic["node_with_minimum_degree_centrality"] = []
        self.min_degree_node = min(nx.degree_centrality(voteomat.get_network()).items(), key = lambda x: x[1])[0]
        self.statistic["node_with_minimum_degree_centrality"].append(voteomat.get_network().nodes(data = True)[self.min_degree_node][1]["orientation"])
        self.statistic["node_with_highest_closeness_centrality"] = []
        self.max_closeness_node = max( nx.closeness_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[0]
        self.statistic["node_with_highest_closeness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_closeness_node][1]["orientation"])
        self.statistic["node_with_highest_betweenness_centrality"] = []
        self.max_betweenness_node = max(nx.betweenness_centrality(voteomat.get_network()).items() ,key = lambda x: x[1])[0]
        self.statistic["node_with_highest_betweenness_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_betweenness_node][1]["orientation"])
        try:
            self.statistic["node_with_highest_eigenvector_centrality"] = []
            self.max_eigenvector_node = max( nx.eigenvector_centrality(voteomat.get_network(), max_iter = 1000).items(),key = lambda x: x[1])[0]
            self.statistic["node_with_highest_eigenvector_centrality"].append(voteomat.get_network().nodes(data = True)[self.max_eigenvector_node][1]["orientation"])
        except nx.NetworkXError:
            print "Eigenvector centrality not possible."

        freeman = self.freeman_centrality([x[1] for x in nx.degree_centrality(voteomat.get_network()).items()], max( nx.degree_centrality(voteomat.get_network()).items(),key = lambda x: x[1])[1])
        self.statistic["freeman_centrality"] = round(freeman,2)

        self.statistic["affecting_neighbours"] = voteomat.affecting_neighbours
        self.statistic["affecting_candidates"] = voteomat.candidates_affecting
        self.statistic["affected_canddiates"] = voteomat.candidates_affected
        self.statistic["affecting_counter_force"] = voteomat.counter_force_affecting
        self.statistic["affecting_counter_force_left"] = voteomat.counter_force_left
        self.statistic["affecting_counter_force_right"] = voteomat.counter_force_right

        self.statistic["candidates"] = []
        for candidate in voteomat.candidates:
            self.statistic["candidates"].append(candidate.to_save())
        self.statistic["network"] = voteomat.get_network().nodes(data=True);
Esempio n. 29
0
def main():
    G = nx.Graph()

    folderKorpus = os.path.abspath('.') + '\\tempo-txt'
    #folderKorpus = 'tempo-txt'
    '''
    for root, files, dirs in os.walk(folderKorpus):
        for name in files:
            print(os.path.join(root, name))
    '''

    ctrBerkas = 0
    sentTokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

    daftarBerkas = grab_files(folderKorpus)
    termList = []
    for berkas in daftarBerkas:
        data = open(berkas)
        ctrBerkas += 1
        ctrBaris = 0
        for baris in data:
            ctrBaris += 1
            sents = sentTokenizer.tokenize(baris)
            for sent in sents:
                kalimat = sent.replace('\n', ' ').strip()
                if len(kalimat) > 0:
                    kalimat = kalimat.lower()
                    tokens = nltk.word_tokenize(kalimat)
                    #print ctrBaris, len(tokens), tokens
                    akhirKalimat = tokens[len(tokens)-1]
                    '''
                    if akhirKalimat != '.':
                        print(berkas)
                        print(ctrBaris)
                        #print(kalimat)
                        print(akhirKalimat)
                    '''
                    if tokens[0] not in termList:
                        termList.append(tokens[0])
                        #print(tokens[0])
                        #print(ctrBerkas, len(termList))
                    for idx in range(1,len(tokens)-1):
                        G.add_edge(tokens[idx-1], tokens[idx])
                        if tokens[idx] not in termList:
                            termList.append(tokens[idx])
                            #print(tokens[idx])
                            #print(ctrBerkas, len(termList))
                    #text = nltk.Text(tokens)
        data.close()
    pprint.pprint(tokens)
    '''
    nx.draw(G)
    plt.show()
    '''
    print("%d berkas diolah" % ctrBerkas)
    print("%d term diolah" % len(termList))

    ce = nx.eigenvector_centrality(G)
    print(sorted(['%0.2f %s'%(ce[node], node) for node in ce]))
    def eigenvector_centrality_ranking(self):
        try:
            results = nx.eigenvector_centrality(self.graph)
        except nx.NetworkXError:
            print('Eigenvector error')
            results = {}

        return self.create_ranking(results)
 def test_multigraph(self):
     with pytest.raises(nx.NetworkXException):
         e = nx.eigenvector_centrality(nx.MultiGraph())
Esempio n. 32
0
def createNetwork():
    d = dirname(dirname(abspath(__file__))) + '/dataset/'

    with open(d + 'status_user_dict.json', 'r') as infile:
        status_user_dict = json.load(infile)

    with open(d + 'user_retweeter_dict.json', 'r') as infile:
        user_retweeter_dict = json.load(infile)

    # with open(d + 'useridtype_dict_revised.json', 'r') as infile:
    #     useridtype_dict = json.load(infile)

    with open(d + 'user_id_dict.json', 'r') as jsonfile:
        user_id_dict = json.load(jsonfile)

    with open(d + 'user_type_dict.json', 'r') as jsonfile:
        user_type_dict = json.load(jsonfile)

    user_id_dict = {k: int(v) for k, v in user_id_dict.items()}
    # user_id_dict["michaelianblack"] = 21035409
    # user_id_dict["TheEllenShow"] =  15846407
    # user_id_dict["CraigyFerg"] = 112508240
    # user_id_dict["bheater"] = 15741636
    # user_id_dict["hodgman"] = 14348594

    for name, id in list(user_id_dict.items()):
        user_id_dict[id] = name

    # print("Number of users with status:", len(set(status_user_dict.values())))
    userset = set(status_user_dict.values())
    interuser_retweeterdict = defaultdict(
        list, {
            int(u): set([e for e in li if e in userset])
            for u, li in user_retweeter_dict.items()
        })
    # print([len(v) for v in interuser_retweeterdict.values()])

    G = nx.DiGraph()
    for sourceuser, retuserlist in interuser_retweeterdict.items():
        updateGraph(G, sourceuser, retuserlist)

    nodelist1 = user_type_dict["1"]
    nodelist2 = user_type_dict["2"]

    plt.figure()
    pos = nx.spring_layout(G)

    nx.draw_networkx_nodes(G,
                           pos,
                           nodelist=[e for e in G.nodes() if e in nodelist1],
                           node_size=100,
                           cmap=plt.get_cmap('jet'),
                           node_color='red')
    nx.draw_networkx_nodes(
        G,
        pos,
        nodelist=[e for e in G.nodes() if e not in nodelist1],
        node_size=100,
        cmap=plt.get_cmap('jet'),
        node_color='blue')

    # nx.draw_networkx_labels(G, pos, labels = user_id_dict)
    # nx.draw(G)
    nx.draw_networkx_edges(G, pos, edge_color='k', arrows=False)
    plt.show()

    # calcualte centrality
    centrality_eigen = nx.eigenvector_centrality(G)
    print(
        sorted([(user_id_dict[node], centrality_eigen[node])
                for node in centrality_eigen],
               key=lambda x: x[1],
               reverse=True))
    centrality_degree = nx.degree_centrality(G)
    print(
        sorted([(user_id_dict[node], centrality_degree[node])
                for node in centrality_degree],
               key=lambda x: x[1],
               reverse=True))
    centrality_indegree = nx.in_degree_centrality(G)
    print(
        sorted([(user_id_dict[node], centrality_indegree[node])
                for node in centrality_indegree],
               key=lambda x: x[1],
               reverse=True))
    centrality_outdegree = nx.out_degree_centrality(G)
    print(
        sorted([(user_id_dict[node], centrality_outdegree[node])
                for node in centrality_outdegree],
               key=lambda x: x[1],
               reverse=True))

    indegree_outdegree_node_tuple = [(centrality_indegree[k],
                                      centrality_outdegree[k], user_id_dict[k])
                                     for k in G.nodes()]
    print(sorted(indegree_outdegree_node_tuple, reverse=True))
    plt.figure(2)
    plt.scatter([e[0] for e in indegree_outdegree_node_tuple],
                [e[1] for e in indegree_outdegree_node_tuple])
    # plt.
    plt.show()
 def test_empty(self):
     with pytest.raises(nx.NetworkXException):
         e = nx.eigenvector_centrality(nx.Graph())
facebook_net = build_facebook_net()

#Degree centrality top 10
deg = nx.degree(facebook_net)
deg_sorted = sorted(deg.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 degree centrality (node, centrality): ", deg_sorted[0:9])
#Closeness centrality top 10
clo = nx.closeness_centrality(facebook_net)
clo_sorted = sorted(clo.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 closeness centrality (node, centrality): ", clo_sorted[0:9])
#Betweenness centrality top 10
bet = nx.betweenness_centrality(facebook_net)
bet_sorted = sorted(bet.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 betweenness centrality (node, centrality): ", bet_sorted[0:9])
#Eigenvector centrality top 10
eig = nx.eigenvector_centrality(facebook_net)
eig_sorted = sorted(eig.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 eigenvector centrality (node, centrality): ", eig_sorted[0:9])
#Pagerank centrality top 10
pag = nx.pagerank(facebook_net)
pag_sorted = sorted(pag.items(), key=operator.itemgetter(1), reverse=True)
print("Top 10 pagerank centrality (node, centrality): ", pag_sorted[0:9])

#Trim network to only show nodes with more than 1 connection
facebook_net_trimmed = facebook_net.copy()
for n in facebook_net_trimmed.nodes():
    if deg[n] < 2:
        facebook_net_trimmed.remove_node(n)

#View all cliques
cliques = list(nx.find_cliques(facebook_net_trimmed))
Esempio n. 35
0
    str(userToID[int(ed[0])]) + ',' + str(userToID[int(ed[1])]) for ed in edges
    if int(ed[0]) in userToID and int(ed[1]) in userToID
]
g = nx.parse_edgelist(edgeList,
                      delimiter=',',
                      create_using=nx.DiGraph(),
                      nodetype=int)

missing_nodes = [int(uid) for uid in IDtoUser if int(uid) not in g.nodes()]
g.add_nodes_from(missing_nodes)

adjMat = nx.adjacency_matrix(g)

degCent = nx.in_degree_centrality(g)
degCent = [degCent[int(uid)] for uid in IDtoUser]
eigenCent = nx.eigenvector_centrality(g)
eigenCent = [eigenCent[int(uid)] for uid in IDtoUser]

# simMat = [ float(np.dot(R[int(i),:],R[int(j),:])) / (1+math.sqrt(np.sum(R[int(i),:][np.where(R[int(j),:]>0)[0]]**2)*np.sum(R[int(j),:][np.where(R[int(i),:]>0)[0]]**2))) for uid in IDtoUser.keys() for i,j in zip( [uid]*len(IDtoUser.keys()), IDtoUser.keys() ) ]
# print(len(simMat))

# simMat = np.reshape(simMat, (len(IDtoUser.keys()),len(IDtoUser.keys())))

# print(simMat.shape)

# with h5py.File('sim_pairs.h5', 'w') as hf:
# 	hf.create_dataset('S', data=simMat)

simMat = None
with h5py.File('sim_pairs.h5', 'r') as hf:
    simMat = hf['S'][:]
def get_graph_metrics(connectivity_vector) :
    
    # reshape into matrix
    connectivity_matrix = np.reshape(connectivity_vector, (90, 90))
    
    # convert to networkx graph
    connectivity_graph = nwx.from_numpy_matrix(connectivity_matrix)
    
    # convert to distance graph as some metrics need this instead
    distance_matrix = connectivity_matrix
    distance_matrix[distance_matrix == 0] = np.finfo(np.float32).eps
    distance_matrix = 1.0 / distance_matrix
    distance_graph = nwx.from_numpy_matrix(distance_matrix)
    
    # intialise vector of metrics
    metrics = np.zeros((21,))
    # fill the vector of metrics
    # 1 and 2: degree distribution
    degrees = np.sum(connectivity_matrix, axis = 1)
    metrics[0] = np.mean(degrees)
    metrics[1] = np.std(degrees)
    
    # 3 and 4: weight distribution
    weights = np.tril(connectivity_matrix, k = -1)
    metrics[2] = np.mean(weights)
    metrics[3] = np.std(weights)

    # 5: average shortest path length
    # transform weights to distances so this makes sense    
    metrics[4] = nwx.average_shortest_path_length(distance_graph, weight='weight')

    # 6: assortativity
    metrics[5] = nwx.degree_assortativity_coefficient(connectivity_graph, weight='None')
    
    # 7: clustering coefficient
    metrics[6] = nwx.average_clustering(connectivity_graph, weight='weight')
    
    # 8: transitivity
    metrics[7] = nwx.transitivity(connectivity_graph)
    
    # 9 & 10: local and global efficiency
    metrics[8] = np.mean(bct.efficiency_wei(connectivity_matrix, local=True))
    metrics[9] = bct.efficiency_wei(connectivity_matrix, local=False)
    
    # 11: Clustering coefficient
    metrics[10] = np.mean(nwx.clustering(connectivity_graph, weight='weight').values())
    
    # 12 & 13: Betweeness centrality
    metrics[11] = np.mean(nwx.betweenness_centrality(distance_graph, weight='weight').values())
    metrics[12] = np.mean(nwx.current_flow_betweenness_centrality(distance_graph, weight='weight').values())
    
    # 14: Eigenvector centrality
    metrics[13] = np.mean(nwx.eigenvector_centrality(distance_graph, weight='weight').values())
    
    # 15: Closeness centrality
    metrics[14] = np.mean(nwx.closeness_centrality(distance_graph, distance='weight').values())
    
    # 16: PageRank
    metrics[15] = np.mean(nwx.pagerank(connectivity_graph, weight='weight').values())
    
    # 17: Rich club coefficient
    metrics[16] = np.mean(nwx.rich_club_coefficient(connectivity_graph).values())
    
    # 18: Density    
    metrics[17] = bct.density_und(connectivity_matrix)[0]
    
    # 19, 20, 21: Eccentricity, radius, diameter
    spl_all = nwx.shortest_path_length(distance_graph, weight='weight')
    eccs = np.zeros(90,)
    for i in range(90) :
        
        eccs[i] = np.max(spl_all[i].values())
        
    metrics[18] = np.mean(eccs)
    metrics[19] = np.min(eccs)
    metrics[20] = np.max(eccs)  
    
    return metrics
Esempio n. 37
0
def main():
    """ Models the whole dataset using features and output to a file. 

      Args:
        None.

      Returns:
        None.
  """
    for i in xrange(1, 5):
        print 'Reading data'
        trusts = load(open('%s/trusts.pkl' % _PKL_DIR, 'r'))
        reviews = load(open('%s/reviews-%d.pkl' % (_PKL_DIR, i), 'r'))
        users = load(open('%s/users-%d.pkl' % (_PKL_DIR, i), 'r'))
        train = load(open('%s/train-%d.pkl' % (_PKL_DIR, i), 'r'))
        validation = load(open('%s/validation-%d.pkl' % (_PKL_DIR, i), 'r'))
        test = load(open('%s/test-%d.pkl' % (_PKL_DIR, i), 'r'))
        sim = load(open('%s/sim-%d.pkl' % (_PKL_DIR, i), 'r'))
        conn = load(open('%s/conn-%d.pkl' % (_PKL_DIR, i), 'r'))

        print 'Generating similarity'
        avg_user = compute_avg_user(users)
        close = {}
        eigen = eigenvector_centrality(trusts)
        for author, voter in sim:
            author_dic = users[author] if author in users else avg_user
            voter_dic = users[voter] if voter in users else avg_user
            # if any feature is nan, the derivated becomes nan and will be imputated
            sim[(author, voter)]['diff_trustors'] = author_dic['num_trustors'] - \
                voter_dic['num_trustors']
            sim[(author, voter)]['diff_reviews'] = author_dic['num_reviews'] - \
                voter_dic['num_reviews']
            sim[(author, voter)]['diff_pagerank'] = author_dic['pagerank'] - \
                voter_dic['pagerank']
            if voter not in close:
                close[voter] = closeness_centrality(trusts, voter) if voter in trusts \
                    else nan
            if author not in close:
                close[author] = closeness_centrality(trusts, author) if author in trusts\
                    else nan
            sim[(author, voter)]['diff_close'] = close[author] - close[voter]
            if voter not in eigen:
                eigen[voter] = nan
            if author not in eigen:
                eigen[author] = nan
            conn[(author, voter)]['diff_eigen'] = eigen[author] - eigen[voter]
        dump(sim, open('%s/new-sim-%d.pkl' % (_PKL_DIR, i), 'w'))

        print 'Generating connection'
        paths = {}
        for author, voter in conn:
            conn[(author, voter)]['voter_trust'] = 1 if \
                trusts.has_edge(voter, author) else 0
            conn[(author, voter)]['author_trust'] = 1 if \
                trusts.has_edge(author, voter) else 0
            if voter not in paths and voter in trusts:
                paths[voter] = single_source_shortest_path_length(
                    trusts, voter)
            if author not in paths and author in trusts:
                paths[author] = single_source_shortest_path_length(
                    trusts, author)
            conn[(author, voter)]['inv_from_vot_path'] = 0 if voter not in trusts \
                or author not in paths[voter] else (1.0 / float(paths[voter][author]))
            conn[(author, voter)]['inv_from_aut_path'] = 0 if author not in trusts \
                or voter not in paths[author] else (1.0 / float(paths[author][voter]))
        dump(conn, open('%s/new-conn-%d.pkl' % (_PKL_DIR, i), 'w'))
def hbase_test():

    # Default return value
    default_return = {'nodes': [], 'edges': []}

    # Input sanity checks
    search = request.args.get('search', '')
    if search == None or search == "":
        print "SD> WARN: Search query is empty"
        return default_return
    elif isinstance(search, str):
        if search.isdigit():
            search = int(search)
        else:
            print "SD> WARN: Search should be a digit"
            return default_return

    search_str = str(search)
    # Establish contact with database
    cluster = Cluster(contact_points=['54.219.144.56'], )
    session = cluster.connect('harary')

    # Look for node in database
    community_id = session.execute(
        "SELECT community FROM node_community_table WHERE source = " +
        search_str)
    if len(community_id) == 0:
        print "SD> WARN: Could not find node " + search_str + " in database"
        return default_return

    community_str = str(community_id[0].community)
    print "SD> INFO: Node " + search_str + " was found in database with community " + community_str

    # Search for community members
    print "SD> INFO: Executing query: " + "SELECT * FROM node_community_table WHERE community = " + community_str + " ALLOW FILTERING"
    result = session.execute(
        "SELECT * FROM node_community_table WHERE community = " +
        community_str + " ALLOW FILTERING;")
    print "SD> INFO: Query result: " + str(
        len(result)) + " members were found for community " + community_str

    # Empty result scenario
    if len(result) == 0:
        return default_return

    # Extreme cases are truncated for practicality
    max_number_of_nodes = 2000
    if len(result) > max_number_of_nodes:
        print "SD> WARN: Excessive number of node (%i). Something is probably wrong.." % len(
            result)
        result = result[0:max_number_of_nodes]

    node_index = 0
    edge_index = 0

    # Allocate the number of nodes
    expected_number_of_nodes = len(result)
    nodes = [{
        'id': '0',
        'index': '0',
        'label': '',
        'community': 0,
        'x': 0,
        'y': 0,
        'size': 10
    } for k in range(expected_number_of_nodes)]

    # Sigma.js
    # edges = [{'id': '0', 'source':'0', 'target':'0'} for k in range(expected_number_of_nodes * expected_number_of_nodes)]
    # D3
    edges = [{
        'source': 100,
        'target': 1000,
        'id': 0
    } for k in range(expected_number_of_nodes * expected_number_of_nodes)]

    # Filter for visualization
    def filter(x):
        return len(x.target) < 50

    # Map ID to linear range for D3
    keys = [r.source for r in result if filter(r)]
    values = range(len(keys))
    dictionary = dict(zip(keys, values))

    # Add all nodes
    for node in result:
        if filter(node):
            nodes[node_index]['id'] = str(dictionary[node.source])
            nodes[node_index]['index'] = str(node.source)
            nodes[node_index]['community'] = node.community
            nodes[node_index]['label'] = "Node: " + str(node.source)
            nodes[node_index]['x'] = random.random()
            nodes[node_index]['y'] = random.random()
            node_index = node_index + 1
            if node.target != None:
                # Add all edges
                for target in node.target:
                    if target in keys:
                        edges[edge_index]['source'] = dictionary[node.source]
                        edges[edge_index]['target'] = dictionary[target]
                        edges[edge_index]['id'] = str(edge_index)
                        edge_index = edge_index + 1

    # Truncate excess
    nodes = nodes[0:node_index]
    edges = edges[0:edge_index]

    # Build graph from json
    G = json_graph.node_link_graph({
        'nodes': nodes,
        'links': edges
    }, False, True)
    DiG = nx.DiGraph(G)
    G = nx.Graph(G)

    # On the fly computation of properties on manageable sizes
    bet_cen = nx.betweenness_centrality(G)
    clo_cen = nx.closeness_centrality(G)
    eig_cen = nx.eigenvector_centrality(G)
    pr = nx.pagerank(DiG, alpha=0.9)
    deg = G.degree()
    com = community.best_partition(G)

    for node in nodes:
        node['betweenness'] = bet_cen[node['id']]
        node['closeness'] = clo_cen[node['id']]
        node['eigenvector'] = eig_cen[node['id']]
        node['pagerank'] = pr[node['id']]
        node['degree'] = deg[node['id']]
        node['community'] = com[node['id']]

    # Return json string
    return json.dumps({'nodes': nodes, 'edges': edges})
Esempio n. 39
0
                 arrows=False,
                 with_labels=True,
                 node_size=[
                     200 + ((averagee_bacon - rg.node[d]['bacon']) * 50)
                     for d in rg.nodes
                 ],
                 width=0.1,
                 style='dashed',
                 cmap=plt.get_cmap("viridis_r"),
                 node_color=[(averagee_bacon - rg.node[d]['bacon'])
                             for d in rg.nodes],
                 label=seed)
fig.savefig('fig/eigenvalues')
fig.clear()

eigenvector_centrality = nx.eigenvector_centrality(rg, max_iter=300)
ord_lc = sorted(eigenvector_centrality.items(),
                key=itemgetter(1),
                reverse=True)
labels = {}
for k, v in ord_lc[0:5]:
    labels[k] = k

fig = plt.figure(num=None,
                 figsize=(15, 10),
                 dpi=80,
                 facecolor='w',
                 edgecolor='k')
nx.draw_networkx(rg,
                 pos=pos_a,
                 arrows=False,
Esempio n. 40
0
 def getEigenvectorC(self):
     mc = self.getMainComponent()
     return nx.eigenvector_centrality(mc)
 def test_eigenvector_centrality_unweighted(self):
     G = self.H
     p = networkx.eigenvector_centrality(G, tol=1.e-08)
     for (a, b) in zip(p.values(), self.G.evc):
         assert_almost_equal(a, b)
def top_n_evcentrality(graph, n=10):
    centrality = nx.eigenvector_centrality(graph)
    sorted_ev = sorted(((v, '{:0.2f}'.format(c)) for v, c in centrality.items()), key=itemgetter(1), reverse=True)
    return sorted_ev[:n]
Esempio n. 43
0
for i in M.nodes():
    M.add_node(i, group=nodes[i]['group'])
    M.add_node(i, name=nodes[i]['name'])

#Write out graph data in JSON file
jsonData = json_graph.node_link_data(M)
with open('data/miserables.json', 'w') as outfile:
    json.dump(jsonData, outfile, indent=4)

#print "list of nodes: "
#print M.nodes(data = True)
#print "list of edges: "
#print M.edges(data = True)

#Eigenvector centrality criteria
Meigen = nx.eigenvector_centrality(M)
normeigen = [float(i) / max(Meigen.values()) for i in Meigen.values()]

#Closeness centrality
Mclose = nx.closeness_centrality(M)
normclose = Mclose.values()

#Betweeness centrality
Mbetween = nx.betweenness_centrality(M)
normbetween = Mbetween.values()

#Graph edges in list form
Medges = [i for i in M.edges()]

#Layout
pos = nx.fruchterman_reingold_layout(M, dim=2)
Esempio n. 44
0
#Genre=['c','d','c','c','c','c','d','d','d','c','d','d','d','d','d','d','d','c']
#TestSet

os.chdir('/home/kel/Desktop/SocialNetworkAnalysis/')
fileE = pd.read_csv("flickrEdges_adj.tsv", sep='\t')
#read from file file
Title = fileE.columns
print(Title)
fileE = fileE.rename(columns={
    Title[0]: 'Source',
    Title[1]: 'Target',
    Title[2]: 'Degree'
})
Gs = nx.from_pandas_edgelist(fileE, source='Source', target='Target')
Ga = Gs.to_directed()
centrality = nx.eigenvector_centrality(Ga, max_iter=20)
sorted((v, f"{c:0.2f}") for v, c in centrality.items())
fileE['Eigenvalue'] = np.nan
#for number in fileE['Source']:
#    value=centrality.get(number)
#    fileE['Eigenvalue'][np.where(fileE.Source==number)[0]]=value
#    fileE['Eigenvalue'] = np.where(fileE.Source==number, value,fileE['Eigenvalue'])
eigenvalues = pd.DataFrame(centrality.items())

n = len(fileE['Source'])
dataS = fileE['Source']
dataE = fileE['Eigenvalue']
dataD = fileE['Degree']
dataE = np.array(dataE).reshape((len(dataE), 1))
dataS = np.array(dataS).reshape((len(dataS), 1))
Data = np.hstack((dataE, dataS))
 def test_eigenvector_centrality_unweighted(self):
     G = self.H
     p = nx.eigenvector_centrality(G)
     for (a, b) in zip(list(p.values()), self.G.evc):
         assert a == pytest.approx(b, abs=1e-4)
Esempio n. 46
0
    print degreeCentrality

    print "harmonic centrality"
    for index in range(0, 16):
        if (index != 11):
            sum = 0
            for index2 in range(0, 16):
                if (index != index2 and index != 11
                        and index2 != 11):  #again, don't try for vertex 11
                    sum += networkx.shortest_path_length(graph, index, index2)
            print(1 / float(sum)) / 15
        else:
            print "0"

    print "eigenvector centrality"
    eigenvectorCentrality = networkx.eigenvector_centrality(graph)
    for index in range(0, 16):
        print eigenvectorCentrality[index]

#betweenness, didn't use networkx command to allow for multiple shortest paths
    print "betweenness centrality"

    for index in range(0, 16):
        counter = 0
        counter2 = 0
        for item in shortestPaths:
            for item2 in item:
                counter2 += 1
                if (index in item2):

                    counter += 1
 def get_eigen(self, n1, n2):
     self.eigen_centrality = nx.eigenvector_centrality(self.graph, n1, n2)
Esempio n. 48
0
def metrics(segments):
    """Calculates network metrics for play"""

    G = nx.Graph()

    weights = {}
    for seg in segments:
        speakers = seg.get('speakers', [])
        length = len(speakers)
        # if segment has only one speaker we add her as a node to make sure she
        # is included in the graph even if she has no connections
        if length == 1:
            G.add_node(speakers[0])
        for i in range(length):
            if i < length - 1:
                source = speakers[i]
                others = speakers[i + 1:length]
                for target in others:
                    edge = tuple(sorted((source, target)))
                    weights[edge] = weights.get(edge, 0) + 1

    G.add_weighted_edges_from([(n[0], n[1], w) for n, w in weights.items()])

    size = len(list(G.nodes))
    max_degree = max([d for n, d in G.degree()])
    max_degree_ids = [n for n, d in G.degree() if d == max_degree]

    path_lengths = [
        y for x in nx.shortest_path_length(G) for y in x[1].values() if y > 0
    ]

    nodes = {}
    wd = G.degree(None, 'weight')
    cc = nx.closeness_centrality(G)
    bc = nx.betweenness_centrality(G)
    # FIXME: nx.eigenvector_centrality throws an exception with
    # https://dracor.org/api/corpora/rus/play/lermontov-strannyj-chelovek
    # we catch this here so we can still yield the rest of the metrics.
    try:
        ec = nx.eigenvector_centrality(G)
    except nx.exception.PowerIterationFailedConvergence:
        ec = {}

    for n, d in G.degree():
        nodes[n] = {
            'degree': d,
            'weightedDegree': wd[n],
            'betweenness': bc[n],
            'closeness': cc[n]
        }
        if n in ec:
            nodes[n]['eigenvector'] = ec[n]

    return {
        'size':
        size,
        'density':
        nx.density(G),
        'diameter':
        max(path_lengths) if len(path_lengths) else 0,
        'averagePathLength':
        (sum(path_lengths) / len(path_lengths)) if len(path_lengths) else 0,
        'averageDegree':
        sum([d for n, d in G.degree()]) / size,
        'averageClustering':
        nx.average_clustering(G),
        'maxDegree':
        max_degree,
        'maxDegreeIds':
        max_degree_ids,
        'numConnectedComponents':
        nx.number_connected_components(G),
        'nodes':
        nodes
    }
Esempio n. 49
0
def compute_graph_features():
    
    # Load data
    train = pd.read_csv("./data/train.csv", names=['row_ID', 'text_a_ID', 'text_b_ID', 'text_a_text', 'text_b_text', 'have_same_meaning'], index_col=0)
    test = pd.read_csv("./data/test.csv", names=['row_ID', 'text_a_ID', 'text_b_ID', 'text_a_text', 'text_b_text', 'have_same_meaning'], index_col=0)


    # Load weights for graph depending on which file is available
    if os.path.exists("./data/distance_features_train.csv") and os.path.exists("./data/distance_features_test.csv"):
        print("Use tfidf dist as weights for graph.")
        train_pred_features = pd.read_csv("./data/distance_features_train.csv")
        test_pred_features = pd.read_csv("./data/distance_features_test.csv")
        train["weight"] = train_pred_features["tfidf_dist_cosine"]
        test["weight"] = test_pred_features["tfidf_dist_cosine"]
        
    elif os.path.exists("./predictions/predictions_ensemble_train.csv") and os.path.exists("./predictions/predictions_ensemble_test.csv"):
        print("Use previous predictions as weights for graph.")
        train_pred_features = pd.read_csv("./predictions/predictions_ensemble_train.csv")
        test_pred_features = pd.read_csv("./predictions/predictions_ensemble_test.csv")
        train["weight"] = train_pred_features["weight"]
        test["weight"] = test_pred_features["weight"]
        
    else:
        print("Use uniform weights for graph.")
        train["weight"] = 1
        test["weight"] = 1


    # Hyperparameters
    max_freq = 50
    max_neighbors = 30
    n_k_cores = 12
    max_level = 3

    # Start computation
    all_question_ids = pd.concat([train["text_a_ID"], train["text_b_ID"], test["text_a_ID"], test["text_b_ID"]])
    unique_question_ids = all_question_ids.unique()

    def shortestPathShortness(row):
        g.remove_edge(row['text_a_ID'], row['text_b_ID'])
        try:
            length = nx.shortest_path_length(g, row['text_a_ID'], row['text_b_ID'], weight="weight")
            if length != 0:
                sps = 1 / length
            else:
                sps = 0
        except nx.NetworkXNoPath:
            sps=0
        g.add_edge(row['text_a_ID'], row['text_b_ID'], weight=row["weight"])
        return sps

    def get_neighbors(qid):
        neighbors = nx.single_source_shortest_path_length(g, qid, cutoff=2)
        neighbors_df = pd.DataFrame(list(zip(neighbors.keys(), neighbors.values())), index=neighbors.keys(), columns=["qid", "n_level"])

        neighbors = []
        for i in range(1, max_level+1):
            neighbors.append(neighbors_df[neighbors_df.n_level==i].qid.values)
        return neighbors


    print("Build Graph...")
    nodes = pd.concat([train.text_a_ID, train.text_b_ID, test.text_a_ID,test.text_b_ID]).values
    edges = pd.concat([train[["text_a_ID", "text_b_ID", "weight"]], test[["text_a_ID", "text_b_ID", "weight"]]]).values

    g = nx.Graph()
    g.add_nodes_from(nodes)
    for e in edges:
        g.add_edge(int(e[0]), int(e[1]), weight=e[2])
    g.remove_edges_from(g.selfloop_edges())

    print("Compute question specific features...")
    df_questions = pd.DataFrame(unique_question_ids, columns=["qid"])
    df_questions.index = df_questions.qid

    print("--> Compute k cores...")
    df_questions["k_core"] = 0
    for i in range(2,n_k_cores):
        print("\t--> core {}".format(i))
        k_core = nx.k_core(g, k=i).nodes()
        df_questions.loc[df_questions.qid.isin(k_core), "k_core"] = i

    print("--> Compute neighbors...")  
    neighbors = df_questions.qid.apply(get_neighbors)
    for i in range(1, max_level+1):
        df_questions["neighbors" + str(i)] = neighbors.apply(lambda x: set(x[i-1]))

    print("--> Compute question frequency...")  
    df_questions["frequency"] = all_question_ids.value_counts()

    print("--> Compute page rank...") 
    pageranks = nx.pagerank(g, weight='weight')
    df_questions["page_rank"] = df_questions.qid.apply(lambda qid: pageranks[qid])

    print("--> Compute closeness centrality...") 
    closeness_centrality = nx.closeness_centrality(g)
    df_questions["closeness_centrality"] = df_questions.qid.apply(lambda qid: closeness_centrality[qid])

    print("--> Compute clustering...") 
    clustering = nx.clustering(g, weight='weight')
    df_questions["clustering"] = df_questions.qid.apply(lambda qid: clustering[qid])
    
    print("--> Compute eigenvector centrality...") 
    eigenvector_centrality = nx.eigenvector_centrality(g, weight='weight')
    df_questions["eigenvector_centrality"] = df_questions.qid.apply(lambda qid: eigenvector_centrality[qid])


    def preprocess(df):
        df_features = pd.DataFrame(index=df.index)
        df_intermediate = pd.DataFrame(index=df.index)
        
        print("--> Compute shortest path shortness...")
        df_features["shortest_path_shortness"] = df.apply(lambda x: shortestPathShortness(x), axis=1)
            
        print("--> Compute frequency features...")
        df_intermediate["freq_a"] = df_questions.loc[df.text_a_ID, "frequency"].values
        df_intermediate["freq_b"] = df_questions.loc[df.text_b_ID, "frequency"].values

        df_features["frequency_min"] = df_intermediate[["freq_a", "freq_b"]].min(axis=1).apply(lambda x: min(x,max_freq))
        df_features["frequency_max"] = df_intermediate[["freq_a", "freq_b"]].max(axis=1).apply(lambda x: min(x,max_freq))

        print("--> Compute neighbor features...")
        for i in range(1, max_level+1):
            df_intermediate["neighbors_a"] = df_questions.loc[df.text_a_ID, "neighbors" + str(i)].values
            df_intermediate["neighbors_b"] = df_questions.loc[df.text_b_ID, "neighbors" + str(i)].values
            df_intermediate["common_neighbors"] = df_intermediate.apply(lambda x: len(list(x.neighbors_a.intersection(x.neighbors_b))), axis=1)

            df_features["common_neighbors" + str(i)] = df_intermediate["common_neighbors"].apply(lambda x: min(x,max_neighbors)).apply(lambda x: min(x,max_neighbors))
            df_intermediate["min_neighbors"] = df_intermediate[["neighbors_a","neighbors_b"]].apply(lambda x: min(len(x.neighbors_a), len(x.neighbors_b)),axis=1)
            df_features["common_neighbors_ratio" + str(i)] = df_features["common_neighbors" + str(i)]/(df_intermediate["min_neighbors"] + 0.00001)

        print("--> Compute k-core features...")
        df_intermediate["k_core_a"] = df_questions.loc[df.text_a_ID, "k_core"].values
        df_intermediate["k_core_b"] = df_questions.loc[df.text_b_ID, "k_core"].values

        df_features["k_core_min".format(i)] = df_intermediate[["k_core_a", "k_core_b"]].min(axis=1)
        df_features["k_core_max".format(i)] = df_intermediate[["k_core_a", "k_core_b"]].max(axis=1)
        
        print("--> Compute page rank features...")
        df_intermediate["page_rank_a"] = df_questions.loc[df.text_a_ID, "page_rank"].values
        df_intermediate["page_rank_b"] = df_questions.loc[df.text_b_ID, "page_rank"].values

        df_features["page_rank_min"] = df_intermediate[["page_rank_a", "page_rank_b"]].min(axis=1).apply(lambda x: min(x,100))
        df_features["page_rank_max"] = df_intermediate[["page_rank_a", "page_rank_b"]].max(axis=1).apply(lambda x: min(x,100))
        
        print("--> Compute closeness centrality features...")
        df_intermediate["closeness_centrality_a"] = df_questions.loc[df.text_a_ID, "closeness_centrality"].values
        df_intermediate["closeness_centrality_b"] = df_questions.loc[df.text_b_ID, "closeness_centrality"].values

        df_features["closeness_centrality_min"] = df_intermediate[["closeness_centrality_a", "closeness_centrality_b"]].min(axis=1).apply(lambda x: min(x,100))
        df_features["closeness_centrality_max"] = df_intermediate[["closeness_centrality_a", "closeness_centrality_b"]].max(axis=1).apply(lambda x: min(x,100))
        
        print("--> Compute clustering features...")
        df_intermediate["clustering_a"] = df_questions.loc[df.text_a_ID, "clustering"].values
        df_intermediate["clustering_b"] = df_questions.loc[df.text_b_ID, "clustering"].values

        df_features["clustering_min"] = df_intermediate[["clustering_a", "clustering_b"]].min(axis=1).apply(lambda x: min(x,100))
        df_features["clustering_max"] = df_intermediate[["clustering_a", "clustering_b"]].max(axis=1).apply(lambda x: min(x,100))
        
        print("--> Compute eigenvector centrality...")
        df_intermediate["eigenvector_centrality_a"] = df_questions.loc[df.text_a_ID, "eigenvector_centrality"].values
        df_intermediate["eigenvector_centrality_b"] = df_questions.loc[df.text_b_ID, "eigenvector_centrality"].values

        df_features["eigenvector_centrality_min"] = df_intermediate[["eigenvector_centrality_a", "eigenvector_centrality_b"]].min(axis=1).apply(lambda x: min(x,100))
        df_features["eigenvector_centrality_max"] = df_intermediate[["eigenvector_centrality_a", "eigenvector_centrality_b"]].max(axis=1).apply(lambda x: min(x,100))
        
        return df_features

    print("Compute train features...")
    train_features = preprocess(train)

    print("Compute test features...")
    test_features = preprocess(test)

    print("Store features...")
    train_features.to_csv("./data/graph_features_train.csv", index=False)
    test_features.to_csv("./data/graph_features_test.csv", index=False)
Esempio n. 50
0
def calculateEigenvector(graph):
		centrality = nx.eigenvector_centrality(graph, weight = 'weight')
		return centrality
 def test_maxiter(self):
     with pytest.raises(nx.PowerIterationFailedConvergence):
         G = nx.path_graph(3)
         b = nx.eigenvector_centrality(G, max_iter=0)
Esempio n. 52
0
    #%% Write out clustering results
    print('Writing out clustering results ' + str(datetime.now()))
    clustering_results_d = {
        'nodes': G_gn.nodes(),
        'clusters': cluster_labels,
        'uids': [nx.get_node_attributes(G_gn, 'uid')[n] for n in G_gn.nodes()]
    }
    clustering_results = pd.DataFrame(clustering_results_d)
    clustering_results['nodeDegree'] = clustering_results['nodes'].apply(
        lambda x: G_gn.degree(x))
    clustering_results['frequency'] = clustering_results['uids'].apply(
        lambda x: len(x))
    nodeDegreeCentrality = nx.degree_centrality(G_gn)
    nodeBetweennessCentrality = nx.betweenness_centrality(G_gn)
    nodeLoadCentrality = nx.load_centrality(G_gn)
    nodeEigenvectorCentrality = nx.eigenvector_centrality(G_gn)
    clustering_results['nodeDegreeCentrality'] = clustering_results[
        'nodes'].apply(lambda x: nodeDegreeCentrality[x])
    clustering_results['nodeBetweennessCentrality'] = clustering_results[
        'nodes'].apply(lambda x: nodeBetweennessCentrality[x])
    clustering_results['nodeLoadCentrality'] = clustering_results[
        'nodes'].apply(lambda x: nodeLoadCentrality[x])
    clustering_results['nodeEigenvectorCentrality'] = clustering_results[
        'nodes'].apply(lambda x: nodeEigenvectorCentrality[x])
    for k, v in moduleResultsDict.items():
        clustering_results['minClique=' +
                           str(k)] = clustering_results['nodes'].map(v)
    outpath = outputbasepath + basename + ' clustering results' + '.csv'
    clustering_results.to_csv(outpath, encoding='utf-8')

    #%%
Esempio n. 53
0
def eigenvector_centrality(g, weight=None):
    return sorted(nx.eigenvector_centrality(g, weight=weight).items(),
                  key=lambda item: item[1],
                  reverse=True)
import networkx as nx
import os
G = nx.Graph()    #Create an empty graph with no nodes and no edges.
file = os.path.join("data.txt")    #Load the data file
with open(file) as p:    #Try to open the data file
    next(p)    #ignore the firts row of the dataset
    for line in p:    #iterate in the dataset
        s=line.split()    #Break the dataset into different columns
        G.add_edge(s[0],s[1],weight=int(s[2]))    #Add edges and weights from the dataset
        
eigen_vector_centrality = nx.eigenvector_centrality(G, max_iter=10000)    #Calculate the Eigen Vector centrality of the network which will return a dictionary 
with open("Updated_Eigen_Vector_Centrality_Output.txt","w") as f:    #Create a text file name Eigen_Vector_Centrality_Output 
    f.write("\t\t\t\t\t\t\t\t\t************************************\t\t\tEigen Vector Centrality Output\t\t\t************************************"+"\n")
     #Write a header title 
    for k,v in eigen_vector_centrality.items():   #Iterate into the dictionary
        f.write(str(k)+": "+str(v)+"\n")    #Write Dictionary keys and values in the file
Esempio n. 55
0
    tupl = sorted_Cent[ii]
    print tupl

clsCent = nx.closeness_centrality(G_original)
sorted_Cls = sorted(clsCent.items(), key=operator.itemgetter(1), reverse=True)
for ii in range(number):
    tupl = sorted_Cls[ii]
    print tupl

btwCent = nx.betweenness_centrality(G_original)
sorted_btw = sorted(btwCent.items(), key=operator.itemgetter(1), reverse=True)
for ii in range(number):
    tupl = sorted_btw[ii]
    print tupl

eigCent = nx.eigenvector_centrality(G_original)
sorted_eig = sorted(eigCent.items(), key=operator.itemgetter(1), reverse=True)
for ii in range(number):
    tupl = sorted_eig[ii]
    print tupl

katzCent = nx.katz_centrality_numpy(G_original)
sorted_katz = sorted(katzCent.items(),
                     key=operator.itemgetter(1),
                     reverse=True)
for ii in range(number):
    tupl = sorted_katz[ii]
    print tupl

nx.draw(G_original,
        nodelist=clsCent.keys(),
Esempio n. 56
0
        b_k1 = np.dot(A, b_k)
        # calculate the norm
        b_k1_norm = np.linalg.norm(b_k1)
        b_k_next = b_k1 / b_k1_norm
        if (np.sum(abs(b_k_next - b_k)) < EPSILON * len(b_k)):
            break
        b_k = b_k_next
        num_simulations -= 1
    print("left iters: ", num_simulations)
    return b_k_next


eigenvector_centrality = power_iteration(subG, num_simulations=30)

compare(list(zip(G.nodes, eigenvector_centrality)),
        list(nx.eigenvector_centrality(subG, weight='weight').items()),
        name="eigenvector")  #,size=subSize)

# In[26]:
"""
Clustering Coefficient 
The global clustering coefficient is the number of closed triplets (or 3 x triangles) 
over the total number of triplets (both open and closed). 

The local clustering coefficient is the proportion of links between the vertices 
within its neighbourhood divided by the number of links that could possibly exist between them.

Average clustering coefficient is mean of local clusterings
"""
unsubG = nx.to_undirected(subG)
clustering_coeffs = {}
Esempio n. 57
0
deg = pd.Series(nx.degree(G))
cc = pd.Series({e: nx.clustering(F, e) for e in F})
deg_cc = pd.concat([deg, cc], axis=1)
deg_cc.columns = ("Degree", "CC")
deg_cc.groupby("Degree").mean().reset_index()\
    .plot(kind="scatter", x="Degree", y="CC", s=100)
plt.xscale("log")
plt.ylim(ymin=0)
plt.grid()
dzcnapy.plot("deg_cc")

# A study of centralities
dgr = nx.degree_centrality(G)
clo = nx.closeness_centrality(G)
har = nx.harmonic_centrality(G)
eig = nx.eigenvector_centrality(G)
bet = nx.betweenness_centrality(G)
pgr = nx.pagerank(G)
hits = nx.hits(G)

centralities = pd.concat(
    [pd.Series(c) for c in (hits[1], eig, pgr, har, clo, hits[0], dgr, bet)],
    axis=1)

centralities.columns = ("Authorities", "Eigenvector", "PageRank",
                        "Harmonic Closeness", "Closeness", "Hubs", "Degree",
                        "Betweenness")
centralities["Harmonic Closeness"] /= centralities.shape[0]

# Calculate the correlations for each pair of centralities
c_df = centralities.corr()
Esempio n. 58
0
#   degree를 계산한 결과를 pandas의 dataframe으로 옮겨서 작업
'''
degrees = pd.DataFrame(list(g.degree()),
        columns=("country", "degree")).set_index("country")
print(degrees.sort_values("degree", ascending=False).head(10))
'''

#   clustering 계수를 계산하고 출력
'''
cc = nx.clustering(g)
for k in cc:
    print("%s  %s"%(k, cc[k]))
'''

#   component 리스트 계산
'''
comps = list(nx.connected_components(g))
for k in comps:
    print(k)
'''

#   중심성을 계산한 결과를 출력
print("North Korea : %.2f"%nx.degree_centrality(g)["North Korea"])
print(nx.closeness_centrality(g))
print(nx.betweenness_centrality(g))
print(nx.eigenvector_centrality(g))




def construct_network(portcalls: pd.DataFrame) -> nx.DiGraph:
    """
    Obtain network with all used node attributes stored in graph:
    - degree
    - in-degree
    - out-degree
    - strength
    - in-strength
    - out-strength
    - closeness centrality (weighted and unweighted)
    - betweenness centrality (weighted and unweighted)
    - eigenvector centrality (weighted and unweighted)

    These node measures can be obtained as follows:
    > pd.DataFrame.from_dict(
        dict(network_base.nodes(data=True)), orient='index')
    """
    assert 'port' in portcalls.columns
    assert 'arrival' in portcalls.columns
    assert 'departure' in portcalls.columns
    assert 'ship' in portcalls.columns
    assert all((portcalls['departure'] -
                portcalls['arrival']).dropna() > pd.Timedelta(0))

    objs = list()
    for _, ship_df in portcalls.groupby('ship'):
        duration = ship_df['arrival'] - ship_df['departure'].shift(1)
        assert all(duration.dropna() >= pd.Timedelta(0)), (print(
            ship_df['arrival'], ship_df['departure'].shift(1)), duration)
        obj = pd.DataFrame({
            'source': ship_df['port'].shift(1),
            'target': ship_df['port'],
            'duration': duration,
            'weight': len(ship_df) - 1,
            'distance': 1 / (len(ship_df) - 1),
        }).dropna()
        objs.append(obj)
    edgelist = pd.concat(objs)

    assert all(edgelist['duration'] >= pd.Timedelta(0))

    # Get graph
    G = nx.from_pandas_edgelist(edgelist,
                                edge_attr=True,
                                create_using=nx.DiGraph)
    nx.set_node_attributes(G, dict(G.degree), 'degree')
    nx.set_node_attributes(G, dict(G.in_degree), 'in_degree')
    nx.set_node_attributes(G, dict(G.out_degree), 'out_degree')
    nx.set_node_attributes(G, dict(G.degree(weight='weight')), 'strength')
    nx.set_node_attributes(G, dict(G.in_degree(weight='weight')),
                           'in_strength'),
    nx.set_node_attributes(G, dict(G.out_degree(weight='weight')),
                           'out_strength')
    nx.set_node_attributes(G, nx.closeness_centrality(G, wf_improved=False),
                           'closeness')
    nx.set_node_attributes(G, nx.betweenness_centrality(G, normalized=False),
                           'betweenness')
    nx.set_node_attributes(G, nx.eigenvector_centrality(G, max_iter=100_000),
                           'eigenvector')
    nx.set_node_attributes(
        G, nx.closeness_centrality(G, distance='distance', wf_improved=False),
        'closenss_weighted')
    nx.set_node_attributes(
        G, nx.betweenness_centrality(G, weight='weight', normalized=False),
        'betweenness_weighted')
    nx.set_node_attributes(
        G, nx.eigenvector_centrality(G, weight='weight', max_iter=100_000),
        'eigenvectors_weighted')
    return G
def buildSimpleNetwork(df):
    def mapp(dic,name):
        for key ,value in dic.iteritems():
            ddf[key][name]=value
                
    ddf={}
    
    G=nx.DiGraph()
    GI=nx.DiGraph()
    nodes=df.hdistrict_id.unique()
    for n in nodes:
        ddf[n]={}
    names=[]
    dics=[]
        
    G.add_nodes_from(nodes)
    for index,row in df.iterrows():
        G.add_edge(row.hdistrict_id,row.sdistrict_id,{'weight':row.counts,'distance':1.0/row.counts})
        GI.add_edge(row.sdistrict_id,row.hdistrict_id,{'weight':row.counts,'distance':1.0/row.counts})
    dics.append(nx.eigenvector_centrality(G,weight='weight'))
    names.append('eigen')
    
    dics.append(nx.eigenvector_centrality(GI,weight='weight'))
    names.append('righteigen')
    
    dics.append(nx.in_degree_centrality(G))
    names.append('indegree')
    
    dics.append(nx.out_degree_centrality(G))
    names.append('outdegree')
    
    dics.append(nx.closeness_centrality(G,distance='distance'))
    names.append('closeness')
    dics.append(nx.betweenness_centrality(G,weight='weight'))
    names.append('betweeness')
        
    def get(chunk):
        return chunk.counts.sum()
    dics.append(df.groupby('hdistrict_id').apply(get).to_dict())
    names.append('outgoing')
    dics.append(df.groupby('sdistrict_id').apply(get).to_dict())
    names.append('incoming')
    
    dic={}
    for n in nodes:
        neigh=G.neighbors(n)
        neigh.remove(n)
        N=len(neigh)
        s=0.0
        for i in range(len(neigh)):
            for j in range(i+1,len(neigh)):
                s+=G.has_edge(*(neigh[i],neigh[j]))
                s+=G.has_edge(*(neigh[j],neigh[i]))
        dic[n]=s/(N*(N-1))
    dics.append(dic)
    names.append('clustering')
    
    def check(row):
        return row.hdistrict_id==row.sdistrict_id
    df['check']=df.apply(check,1) 
    df2=df[df.check==False]
    dics.append(df2.groupby('hdistrict_id').apply(get).to_dict())
    names.append('outgoing_noself')
    dics.append(df2.groupby('sdistrict_id').apply(get).to_dict())
    names.append('incoming_noself')   
    for i in range(len(dics)):
        mapp(dics[i],names[i])
    ddf=pd.DataFrame.from_dict(ddf,orient='index') 
    ddf['district_id']=ddf.index
    return ddf