Beispiel #1
0
    def testSparseAdjacency(self):
        mat = sparse.coo_matrix(
            [[0, 1, 1, 0], [1, 0, 0, 0], [0, 0, 2, 0], [0, 1, 0, 0]],
        )

        # ADJ_DIRECTED (default)
        g = Graph.Adjacency(mat)
        el = g.get_edgelist()
        self.assertTrue(g.is_directed())
        self.assertEqual(4, g.vcount())
        self.assertTrue(el == [(0, 1), (0, 2), (1, 0), (2, 2), (2, 2), (3, 1)])

        # ADJ MIN
        g = Graph.Adjacency(mat, mode="min")
        el = g.get_edgelist()
        self.assertFalse(g.is_directed())
        self.assertEqual(4, g.vcount())
        self.assertTrue(el == [(0, 1), (2, 2), (2, 2)])

        # ADJ LOWER
        g = Graph.Adjacency(mat, mode="lower")
        el = g.get_edgelist()
        self.assertFalse(g.is_directed())
        self.assertEqual(4, g.vcount())
        self.assertTrue(el == [(0, 1), (2, 2), (2, 2), (1, 3)])
Beispiel #2
0
def read_dimacs(filename: str, reverse=False):
    with open(filename) as fp:
        for cnt, line in enumerate(fp):
            if re.search(r'^e (\d*) (\d*).*', line):
                match = re.search(r'^e (\d*) (\d*).*', line)
                source = int(match.group(1))  # dimacs are not 0-index
                dest = int(match.group(2))
                matrix[source - 1, dest - 1] = 1
                matrix[dest - 1, source - 1] = 1
            elif re.search('^c .*', line):
                pass
            elif re.search(r'^p edge (\d*) .*', line):
                match = re.search(r'^p edge (\d*) .*', line)
                vertices_num = int(match.group(1))
                matrix = np.zeros((vertices_num, vertices_num), dtype=np.byte)

    if reverse:
        reverse_matrix = np.ones_like(matrix) - matrix

        for i in range(len(reverse_matrix)):
            reverse_matrix[i, i] = 0

        matrix = reverse_matrix

    graph: Graph = Graph.Adjacency(matrix.tolist(), mode=ADJ_UNDIRECTED)
    return graph, matrix
Beispiel #3
0
    def testAdjacencyNumPy(self):
        mat = np.array(
            [[0, 1, 1, 0], [1, 0, 0, 0], [0, 0, 2, 0], [0, 1, 0, 0]], )

        # ADJ_DIRECTED (default)
        g = Graph.Adjacency(mat)
        el = g.get_edgelist()
        self.assertTrue(el == [(0, 1), (0, 2), (1, 0), (2, 2), (2, 2), (3, 1)])

        # ADJ MIN
        g = Graph.Adjacency(mat, mode="min")
        el = g.get_edgelist()
        self.assertTrue(el == [(0, 1), (2, 2), (2, 2)])

        # ADJ LOWER
        g = Graph.Adjacency(mat, mode="lower")
        el = g.get_edgelist()
        self.assertTrue(el == [(0, 1), (2, 2), (2, 2), (1, 3)])
Beispiel #4
0
def igraphCluster(allInterests, simMatrix):
    g = Graph()
    g = g.Adjacency(simMatrix, ADJ_LOWER)
    for i, v in enumerate(g.vs):
       v['interest_id'] = allInterests[i]
    
    cl = g.community_fastgreedy()
    
    return [ extractVertexIds(cl.subgraph(i))  for i in range(len(cl.sizes())) ]
Beispiel #5
0
def _floyd_transform(gg):
    # TODO: Beautify.
    g_floyd_am = gg.shortest_paths_dijkstra()
    g_floyd_am = np.asarray(g_floyd_am).reshape(len(g_floyd_am),
                                                len(g_floyd_am))
    g = Graph.Adjacency((g_floyd_am > 0).tolist())
    g.es['label'] = g_floyd_am[g_floyd_am.nonzero()]
    g.vs['id'] = np.arange(len(gg.vs['label']))
    g.vs['label'] = gg.vs['label']
    return g
Beispiel #6
0
def adaptative_nearest_network(phase_space, E0=5):
	"""
--------------------------------------------
Convert a phase space into a adaptative nearest neighbor network, an undirected one
--------------------------------------------
phase_space:	Array. The phase space representation in numpy array format
E0:				Number.  The E0 nearest neighbors will be connected according to specification
--------------------------------------------
Return a graph object, using igraph representation
See article for details of the implementation
	Recurrence-Based Time Series Analysis by means of Complex Network Methods, 2011
	Donner, R.; Small, M.; Donges, J.; Marwan, N.; Zou, Y.; Xiang, R.; Kurths, J.
	International Journal of Bifurcation and Chaos, Vol. 21 No. 4 (2011)
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import phase_space as phs

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2)

graph = phs.adaptative_nearest_network(rc, E0=5)

	"""
	from scipy.spatial.distance import pdist, squareform
	from igraph import Graph
	from igraph import ADJ_UNDIRECTED, ADJ_DIRECTED
	from sklearn.neighbors import NearestNeighbors

	# TODO allow other algorithms
	# it's passed E0+1 because each node is considered the nearest neighboor of itself
	nbrs = NearestNeighbors(n_neighbors=E0+1, algorithm='kd_tree').fit(phase_space)
	
	adj_mat = nbrs.kneighbors_graph(phase_space, mode='connectivity')
	
	diag = range(adj_mat.shape[0])
	adj_mat[diag, diag] = 0
	
	# adaptation of edges
	for i in diag:
		k_offset = adj_mat[0:i, i].sum() +1
		# discard the first neighbor, itself
		new_neighbors = nbrs.kneighbors(X=[phase_space[i]], n_neighbors=k_offset, return_distance=False)[0][1:]
		adj_mat[i, new_neighbors] = 1
	
	return Graph.Adjacency(adj_mat.toarray().tolist(), mode=ADJ_UNDIRECTED)
    def create_graph(self, filename):
        '''Creates a graph from adjacency matrix'''

        df = pd.read_csv(filename, sep=',', header=None, skiprows=1)
        df = df.drop(df.columns[0], axis=1)

        A = np.matrix(df.values)
        self.graph = Graph.Adjacency(A.tolist())
        self.graph = self.graph.as_undirected()

        self.order = self.graph.vcount()
        self.n = self.order
Beispiel #8
0
def CalculateShortestPathKernel(G):
    G_floyd = []
    for i in range(len(G)):
        g_floyd_am = G[i].shortest_paths_dijkstra()
        g_floyd_am = np.asarray(g_floyd_am).reshape(len(g_floyd_am),
                                                    len(g_floyd_am))
        g = Graph.Adjacency((g_floyd_am > 0).tolist())
        g.es['label'] = g_floyd_am[g_floyd_am.nonzero()]
        g.vs['id'] = np.arange(len(G[i].vs['label']))
        g.vs['label'] = G[i].vs['label']
        G_floyd.append(g)

    G_floyd = np.array(G_floyd)

    K = CalculateKStepRandomWalkKernel(G_floyd, par=(0, 1))
    return K
Beispiel #9
0
def epsilon_recurrence_network(phase_space, epsilon=0.1, dist_vec=None):
	"""
--------------------------------------------
Convert a phase space into a epsilon recurrence network, an undirected one
--------------------------------------------
phase_space:	Array. The phase space representation in numpy array format
epsilon:		Number or Dict. The distance threshold, distance values below this means the two 
				points should be connected. If a dict is passed instead, it should have the key 'percentile'
				with the corresponding percentile to compute.
dist_vec:		Array. The distance matrix between points, if not passed this will be calculated. 
				It's used to save computation if this is alread computed
--------------------------------------------
Return a graph object, using igraph representation
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import phase_space as phs

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2)

graph = phs.epsilon_recurrence_network(rc, {'percentile': 25})
	"""
	from scipy.spatial.distance import pdist, squareform
	from igraph import Graph
	from igraph import ADJ_UNDIRECTED
	
	# TODO allow other distance metrics
	if not dist_vec:
		dist_vec = pdist(X=phase_space, metric='euclidean')
	
	# if set to get the epsilon automatic, get the value corresponding to 
	# the passed percentile of the distances
	if type(epsilon) == type({}):
		epsilon = np.percentile(dist_vec, epsilon['percentile'])
	
	adj_mat = squareform( dist_vec < epsilon )
	return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_UNDIRECTED)
Beispiel #10
0
def k_nearest_network(phase_space, k=5):
	"""
--------------------------------------------
Convert a phase space into a k nearest neighbor network, a directed one
--------------------------------------------
phase_space:	Array. The phase space representation in numpy array format
k:				Number. The k nearest neighbors will be connected
--------------------------------------------
Return a graph object, using igraph representation
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import phase_space as phs

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2)

graph = phs.k_nearest_network(rc, k=5)

	"""
	from scipy.spatial.distance import pdist, squareform
	from igraph import Graph
	from igraph import ADJ_UNDIRECTED, ADJ_DIRECTED
	from sklearn.neighbors import NearestNeighbors

	# TODO allow other algorithms
	# it's passed k+1 because each node is considered the nearest neighboor of itself
	nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='kd_tree').fit(phase_space)
	
	adj_mat = nbrs.kneighbors_graph(phase_space, mode='connectivity').toarray()
	diag = range(len(adj_mat))
	adj_mat[diag, diag] = 0
	return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_DIRECTED)
Beispiel #11
0
def do_enumeration():
    dim = 5

    eigvals = []
    #  List of measures.
    #  The dict key "func" should be a function that takes a graph and returns a measure result.
    measures = [{"func": lambda graph: Graph.average_path_length(graph, unconn=False), "result": [], "name": "Average Path Length"},
                {"func": lambda graph: sum(Graph.degree(graph))/len(Graph.degree(graph)), "result": [], "name": "Average Degree"},
                # {"func": lambda graph: max(Graph.degree(graph)), "result" : [], "name" : "Maximum Degree"},
                {"func": lambda graph: sum(Graph.betweenness(graph))/len(Graph.betweenness(graph)), "result": [], "name": "Average Betweenness"},
                # {"func": lambda graph: max(Graph.betweenness(graph)), "result" : [], "name" : "Maximum Betweenness"},
                {"func": lambda graph: len(Graph.cliques(graph)), "result": [], "name": "Number of Cliques"},
                # {"func": lambda graph: Graph.clique_number(graph), "result" : [], "name" : "Largest Clique"},
                {"func": lambda graph: Graph.transitivity_undirected(graph, mode="zero"), "result": [], "name": "Global Clustering Coefficient"},
                # {"func": lambda graph: Graph.transitivity_avglocal_undirected(graph, mode="nan"), "result" : [], "name" : "Average Local Clustering Coefficient"},
                {"func": lambda graph: shieldvalue(list(range(0,dim)), graph), "result": [], "name": "Shield Value"}]

    for i in EnumerateAdjacency(dim):
        upper = np.array(i)
        adj = np.maximum(upper, np.transpose(upper))
        eigv = np.linalg.eigvals(adj)
        x = np.real(max(eigv))
        graph = Graph.Adjacency(adj.tolist(), igraph.ADJ_UNDIRECTED)
        eigvals.append(x)
        for measure in measures:
            measure["result"].append(measure["func"](graph))

    for measure in measures:
        (pearson, _) = scipy.stats.pearsonr(measure["result"], eigvals)
        print(measure["name"], pearson)
        plt.plot(measure["result"], eigvals, 'o')
        plt.ylabel("Max eigen value")
        plt.xlabel(measure["name"])
        plt.title(measure["name"])
        plt.savefig(measure["name"] + ".png", dpi=200)
        plt.clf()
Beispiel #12
0
import numpy as np
from sklearn.neighbors import NearestNeighbors, LSHForest

from igraph import Graph, EdgeSeq
from timeit import timeit
import random

random.seed(100)

#robjects.r['load']('../processed_sub_Data.RData')
print "Reading sparce matrix..."
matrix = mmread("sub_matrix")
print "Converting matrix to dense format..."
a = np.array(matrix.todense())
print a.shape
print "Initialize LSH..."
lshf = LSHForest(n_neighbors=10, random_state=1, n_estimators=10)
print "fit LSH..."
lshf.fit(a)

K = lshf.kneighbors_graph(a)

print "convert into adjacency matrix..."
K = K.toarray()

g = Graph.Adjacency(K.tolist())
es = EdgeSeq(g)

print "writing graph edgelist..."
g.write_edgelist("src_dst_lsh.csv")
Beispiel #13
0
    def AdjacencyMatrix(file: str,
                        sep: str or None = None,
                        header: bool = True) -> Graph:
        r"""
        Imports an adjacency matrix file to a :py:class:`igraph.Graph` object ready to be used by Pyntacle.

        For more information on adjacency matrices we refer the user to the `File Formats Guide <http://pyntacle.css-mendel.it/resources/file_formats/file_formats.html#adjm>`_
        on Pyntacle website.

        .. note:: We support unweighted undirected Adjacency Matrices, so only zeroes and ones are allowed in the input file.

        .. note:: If an header is present, it **must** contain unique names (two nodes can't have the same ID). if not, an  error wil be raised. The names of the node will be assigned to the vertex ``name`` attribute. If the header is not present, the node "name" attribute will be the corresponding sequential index assigned by igraph.

        :param str file: the path to the file storing the adjacency matrix
        :param None,int sep: The field separator inside the network file. if :py:class:`None` (default) it will be guessed. Otherwise, you can place the string representing the column separator.
        :param bool header: Whether the header is present or not (default is ``True``)

        :return igraph.Graph: an iGraph.Graph object compliant with Pyntacle `Minimum Requirements <http://pyntacle.css-mendel.it/requirements.html>`_

        :raise WrongArgumentError: if ``sep`` is not found in the adjacency matrix
        :raise ValueError: if the matrix is not squared
        """

        if not AdjmUtils(file=file, header=header, sep=sep).is_squared():
            raise ValueError(u"Matrix is not squared")

        with open(file, "r") as adjmatrix:
            iterator = iter(adjmatrix.readline, '')

            first_line = next(iterator, None).strip()
            if sep not in first_line:
                raise WrongArgumentError(
                    u'The specified separator "{}" is not present in the adjacency matrix file'
                    .format(sep))

            if header:
                #use pandas to parse this into
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, index_col=0)
                f.index = f.index.map(
                    str
                )  #force index to string, in case is not identified properly
                f = f.reindex(sorted(f.columns),
                              axis=1)  # sort columns alphabetically
                f = f.reindex(sorted(f.index),
                              axis=0)  # sort indices alphabetically
                node_names = f.columns.values.tolist()

            else:
                f = pd.read_csv(filepath_or_buffer=file, sep=sep, header=None)
                node_names = list(map(str, f.index))

            graph = Graph.Adjacency(f.values.tolist(), mode="UPPER")
            util = gu(graph=graph)
            util.graph_initializer(graph_name=os.path.splitext(
                os.path.basename(file))[0],
                                   node_names=node_names)
            graph = util.get_graph()

            sys.stdout.write(u"Adjacency matrix from {} imported\n".format(
                os.path.basename(file)))
            return graph
Beispiel #14
0
import csv
from igraph import Graph
from sys import argv
from os import path
from scipy.io import mmread

file_name = None

try:
    file_name, file_extension = path.splitext(argv[1])
except IndexError:
    print('Invalid filename. Usage:\npython mtx-to-point-cloud.py filename')
    exit(1)

matrix = mmread(file_name)
graph = Graph.Adjacency(list(matrix.toarray()))
layout = graph.layout_kamada_kawai_3d()

with open('%s.txt' % file_name, 'w') as f:
    csv.writer(f, delimiter=' ').writerows(layout.coords)
Beispiel #15
0
for _ in range(10):
    shuffled_data = data[:]
    np.random.shuffle(shuffled_data)
    for row in shuffled_data:
        np.random.shuffle(row)
        row = list(row)
    list_data = []
    for rid, row in enumerate(shuffled_data):
        list_row = []
        for iid, i in enumerate(row):
            if rid == iid:
                list_row.append(0)
            else:
                list_row.append(int(i))
        list_data.append(list_row)
    rand_graph = Graph.Adjacency(list_data, mode="MAX")
    ave_clu.append(rand_graph.transitivity_undirected())
p_value = st.ttest_1samp(ave_clu, glo_clu)[1]
if p_value < 0.05:
    print("The clustering is statistically significantly different from random"
          " clustering with a p-value of {}".format(p_value))
else:
    print("The clustering is statistically not significantly different from "
          "random clustering with a p-value of {}".format(p_value))

rewiring_probability = np.logspace(-3., -1., 10)
closeness_centrality = np.zeros_like(rewiring_probability)
g = igraph.Graph.Lattice([1, 50],
                         nei=8,
                         directed=False,
                         mutual=True,
Beispiel #16
0
def check(sol, paths):
	graph = Graph.Adjacency(sol)
	graph = graph.as_directed()
	if not len(paths_from_to(graph, 0, len(sol)-1))==paths:
		print("ERROR")
Beispiel #17
0
def cycle_network(ts, cycle_ths=0.3):
	"""
--------------------------------------------
Convert a time series into a cycle network
--------------------------------------------
ts:			List. The time series
cycle_ths:	Number. The threshold of proximity between cycles
--------------------------------------------
Return a graph object, using the igraph representation
Currently the cycles are defined as the time series segments between peaks, and
the proximity measure between cycles is the pearson correlation

See article for details of the implementation
	Recurrence-Based Time Series Analysis by means of Complex Network Methods, 2011
	Donner, R.; Small, M.; Donges, J.; Marwan, N.; Zou, Y.; Xiang, R.; Kurths, J.
	International Journal of Bifurcation and Chaos, Vol. 21 No. 4 (2011)
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import cycle 

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

graph = cycle.cycle_network(ts, cycle_ths=0.3)
	"""
	from scipy.spatial.distance import squareform, pdist
	

	from time import time
	
	t = time()
	peaks = get_peaks(ts)
	cycles = np.array([ts[peaks[i-1]:peaks[i]] for i in range(1, len(peaks))])
	print(('get cycles', time()-t))


	t = time()
	# still has problem with types conversions
#	from ts2cn.ts.cycle_opt import cycle_opt
#	corr = cycle_opt.correlation(cycles, cycle_ths)


	# this is the one I will leave
#	corr = np.array([proximity(cycles[i], cycles[j]) for i in range(len(cycles)) for j in range(i+1, len(cycles))])

#	corr = pdist(cycles, metric=proximity)
#	corr = cycle_corr.corr(cycles)
#	corr = cycle_cython.corr(cycles)
	
	# this code has the best performance
	# Try this code as cython
	from scipy.stats import pearsonr
	#corr = []
	corr = [0] * int((len(cycles)*(len(cycles)-1))/2)
	l = 0
	for i in range(len(cycles)):
		for j in range(i+1, len(cycles)):
			bigger = cycles[i]
			smaller = cycles[j]
			bg_size = len(bigger)
			sm_size = len(smaller)
			if sm_size > bg_size:
				bigger = cycles[j]
				smaller = cycles[i]
			bg_size = len(bigger)
			sm_size = len(smaller)
			
			#connect = 0
			for k in range(bg_size - sm_size):
				r =  pearsonr(smaller, bigger[k:k+sm_size])[0]
				if r > cycle_ths:
					#connect = 1
					corr[l] = 1
					break
			l += 1
			#corr.append(connect)

	
	print(time()-t)
	
	adj_mat = squareform(corr)
	
#	adj_mat = squareform(corr > cycle_ths)
	return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_UNDIRECTED)
Beispiel #18
0
def correlation_network(phase_space, rho_ths=0.2):
	"""
--------------------------------------------
Convert a phase space into a correlation network, an undirected one
--------------------------------------------
phase_space:	Array. The phase space representation in numpy array format
rho_ths:		Number. The rho (greek letter used for correlation) threshold.
				Points with correlation above this value will be connected.
				This varies between -1 and +1
--------------------------------------------
Return a graph object, using igraph representation
See article for details of the implementation
	Recurrence-Based Time Series Analysis by means of Complex Network Methods, 2011
	Donner, R.; Small, M.; Donges, J.; Marwan, N.; Zou, Y.; Xiang, R.; Kurths, J.
	International Journal of Bifurcation and Chaos, Vol. 21 No. 4 (2011)
--------------------------------------------
Usage example:

import numpy as np
import imp

from ts2cn.ts import phase_space as phs

filename='lorenz.dat'
file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r')
ts = file.read().split()
ts = [float(i) for i in ts]

rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2)

graph = phs.correlation_network(rc, rho_ths=0.2)

	"""
	from scipy.stats import pearsonr as r
	from scipy.spatial.distance import squareform, pdist
	
#	from time import time
#	t=time()
	
	# Garantee that this approach is equivalent to the previous one
	
	# the definition of a distance is 1 - corr(x,y), so in order to get the correlation
	# again it's needed to subtract one and multiple by -1
	corr = (pdist(phase_space, metric='correlation' ) -1)*-1
#	corr = pdist(phase_space, metric=lambda x, y: r(x,y)[0] )
	#corr = [r(phase_space[i], phase_space[j])[0] for i in range(len(phase_space)) 
	#												for j in range(i+1, len(phase_space))]
#	corr = [True if r(phase_space[i], phase_space[j])[0] >= rho_ths else False for i in range(len(phase_space)) 
#													for j in range(i+1, len(phase_space))]
#	print(('corr', time()-t))
#	t=time()
	corr_ths = np.array(corr) >= rho_ths
#	corr_ths = corr
#	print(('corr_ths', time()-t))
#	t=time()
	adj_mat = squareform( corr_ths )
#	print(('square', time()-t))
	
#	adj_mat = squareform(np.array([r(phase_space[i], phase_space[j])[0] for i in range(len(phase_space)) 
#													for j in range(i, len(phase_space))]) >= rho_ths )
	
	'''
	# code calling R to compute the pearson correlation
	import os, rpy2.robjects as ro
	# first save phase_space vector to a temporary file
	tmp_file = '/tmp/tt'
	sep = ';'
	size = 100
	phase_space[:size].tofile(tmp_file, sep=sep)
	r_script = """
	mat = matrix(as.numeric(read.table(file="%s", sep="%s")), nrow=%d, ncol=%d, byrow=TRUE)
	size = %d
	for (i in 1:(size-1)) for (j in (i+1):size) print(cor(mat[i,], mat[j,]))
	""" % (tmp_file, sep, size, len(phase_space[0]), size)
	ro.r(r_script)
	'''

	return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_UNDIRECTED)
Beispiel #19
0
    def process_file(self, filename):
        """Loads a graph from the given file, runs the clustering
        algorithm on it and prints the clusters to the standard
        output."""
        self.log.info("Processing %s..." % filename)

        graph = load(filename, format=self.options.format)

        #delete edges with weight less than weight_threshold provided as input options
        if "weight" in graph.edge_attributes(): 
            graph.es['width']=5
            if "weight" in graph.es.attributes():
                max_w = math.log(1+max(graph.es['weight']),2)
                min_w = math.log(1+min(graph.es['weight']),2)
                graph.es['width']=[int(10*(math.log(1+w,2)-min_w)/(max_w-min_w))+1 for w in graph.es['weight']]
            graph.es(weight_lt=self.options.weight_threshold).delete()
            graph.vs(_degree=0).delete() #delete isolated vertices as a result of above edge removal
        
        # First lets plot the histogram of edge weights
        n, b, patches = plt1.hist(graph.es['weight'], density=True, log=True)#, bins=range(0,10000,10), facecolor='g', alpha=0.75)
        plt1.xlabel('Edge Weights')
        plt1.ylabel('Probability')
        plt1.grid(True)
        #plt1.show()
        
        # Obtain k-NN for all vertices using 1/linkweight as distance metric
        # This will be a square matrix of |Vertex|*|Vertex| where [i][j] denotes 
        # whether vertex j is among the K-NN of vertex i
        # this is obtained by sorting the rows of the original proximity matrix and taking the first k elements
        adj = graph.get_adjacency(type=2, attribute="weight", default=0, eids=False)
        Vsize = adj.shape
        knn = np.eye(Vsize[0]) #consider a node to be its nearest neighbor
        KNN_count = self.options.knn
        if KNN_count == 0: KNN_count = Vsize[0] #consider all neighbors
        for i in range(Vsize[0]):
            tmp = sorted(range(len(adj[i])), key=adj[i].__getitem__,reverse=True)
            indexes = [j for j in tmp if adj[i][j] > 0]
            nn = indexes[:KNN_count]
            knn[i][nn] = 1;
        # now knn is an np.array for which knn[i][j]==1 iff j is among the k-nearest neighbors of i    
        
        # Next we compute the shared nearest neighbor similarity matrix
        # snn_tmp[i][j] will hold the number of common k-nearest neighbors of i and j
        snn = np.zeros(Vsize)
        snn_tmp = np.zeros(Vsize)
        for i in range(Vsize[0]):
            snn[i] = np.array([1/(0.000001+np.dot(knn[i],knn[j])) for j in range(Vsize[0])])
            snn[i][i] = 0
            tmp = [np.dot(knn[i],knn[j]) for j in range(Vsize[0]) if np.dot(knn[i],knn[j]) > 0]
            #print(i,tmp) 
            snn_tmp[i] = np.array([np.dot(knn[i],knn[j]) for j in range(Vsize[0])])
            snn_tmp[i][i] = 0#1000 
            #print("***",snn_tmp[i])
        
        #print(list(snn_tmp))
        #print(len(snn_tmp))

        # Apply similarity threshold and eliminate some edges of the SNN graph
        # Form the SNN graph
        g = Graph.Adjacency((snn_tmp >= self.options.snn_threshold ).tolist(), mode=1) #create undirected (mode=1) graph treating edge weights as True/False only
        g.es["weight"] = snn_tmp[(snn_tmp >= self.options.snn_threshold)] #add weights
        g.vs["name"] = graph.vs["name"] #use same vertex names
        g.vs["label"]= graph.vs["name"]
        g.es["width"] = g.es["weight"]
        
        
        # plot the SNN graph
        layout = graph.layout('kk')
        width,height = 2*1300,2*700
        surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
        ctx = cairo.Context(surface)
        ctx.scale(width,height)
        ctx.rectangle(0, 0, 1, 1)
        ctx.set_source_rgba(0,0,0,0)
        ctx.fill()
        plt = plot(g, vertex_shape = 'circle', bbox = (width, height), layout = layout)
        

        # next lets do the k-dist plot: k-th nearest neighbor distance vs. # of points having that distance to their k-th nn
        # compute distance of all points to their MinPts-th nearest neighbor
        X=np.sort(snn,axis=1)
        dist = [X[i][self.options.MinPts] for i in range(Vsize[0])]

        Y=np.sort(snn_tmp,axis=1)
        siml = [Y[i][-self.options.MinPts] for i in range(Vsize[0])]
        #ssiml = siml #remove
        # sort points with respect to this distance
        argsim = np.argsort(siml)
        dist = sorted(dist,reverse=False)
        siml = sorted(siml,reverse=True)       
        print('.....................# of Shared Nearest Neighbors with '+str(self.options.MinPts)+'-th NN')
        for i in range(len(siml)):
            print("similarity = ",siml[i]," : ",graph.vs[argsim[-i-1]]["name"])
            #print(i,siml[i],ssiml[argsim[-i-1]]) #remove

        print("shared NN=", siml)
        points = []
        print("unique dist=",np.unique(dist))
        for i in list(np.unique(dist)):
            print(i)
            points.append(max([j for j,k in enumerate(list(dist)) if k==i]))
        print("points=",points)

        # Now lets plot the histogram of similarity with MinPts-th nearest neighbor 
        n, b, patches = plt1.hist(siml, density=False, log=False, bins=range(int(max(list(siml)))), facecolor='g', alpha=0.75)
        plt1.xlabel('# of Shared Nearest Neighbors with '+str(self.options.MinPts)+'-th NN')
        plt1.ylabel('Frequency')
        plt1.grid(True)
        #plt1.show()
        

        # plot this distance vs. sorted point index
        #import matplotlib.pyplot as plt1
        plt1.ylabel(str(self.options.MinPts)+'-th NN distance')
        plt1.xlabel('# of points')
        plt1.plot(points[:-2],list(np.unique(dist))[:-2])
        #plt1.show()

        
        
        # If the graph has weights and we want to ignore them, delete them
        if self.options.no_weights and "weight" in graph.edge_attributes():
            del graph.es["weight"]

        # If the graph is directed, we have to make it undirecteed
        if graph.is_directed():
            graph.to_undirected(combine_edges="sum")
            self.log.warning("Converted directed graph to undirected.")

        # Set up the "name" attribute properly
        if "label" in graph.vertex_attributes():
            graph.vs["name"] = graph.vs["label"]
            del graph.vs["label"]
        elif "name" not in graph.vertex_attributes():
            graph.vs["name"] = [str(i) for i in xrange(graph.vcount())]
Beispiel #20
0
def plot_graph(graph: int):
    lastname = get_friends(graph, 'last_name')
    vertices = [lastname['items'][i]['first_name']
                + ' ' + lastname['items'][i]['last_name']
                for i in range(lastname['count'])]
    edges = get_network(get_friends_id(graph), False)
    if isinstance(edges, ndarray):
        g = Graph.Adjacency(edges.tolist(), mode='undirected')
        g.vs['label'] = vertices
        g.vs['shape'] = 'triangle'
        g.vs['size'] = 10
    else:
        # Создание графа
        g = Graph(vertex_attrs={"shape": "triangle",
                                "label": vertices,
                                "size": 10},
                  edges=edges, directed=False)

    # Задаем стиль отображения графа
    n = len(vertices)
    visual_style = {"vertex_size": 20,
                    "bbox": (2000, 2000),  # размер поля
                    "margin": 100,  # расстояние от края до вершин
                    "vertex_label_dist": 1.6,  # расстояние между вершинами
                    "edge_color": "gray",
                    "autocurve": True,  # кривизна ребер
                    "layout": g.layout_fruchterman_reingold(
                        # Fruchterman-Reingold force-directed algorithm
                        # алгоритм компоновки
                        maxiter=1000,
                        # the maximum distance to move a vertex in an
                        # iteration. The default is the number of vertices
                        area=n ** 3,
                        # he area of the square on which the vertices will
                        #  be placed. The default is the square of the number
                        # of vertices.
                        repulserad=n ** 3
                        # determines the radius at which vertex-vertex
                        # repulsion cancels out attraction of adjacent
                        # vertices. The default is the number of vertices^3.
                    )}
    g.simplify(multiple=True, loops=True)
    # Отрисовываем граф
    clusters = g.community_multilevel()
    # Finds the community structure of the graph according
    # to the multilevel algorithm of Blondel et al.
    '''
    Это восходящий алгоритм: первоначально каждая вершина принадлежит 
    отдельному сообществу, а вершины перемещаются между сообществами 
    итеративно таким образом, чтобы максимизировать локальный вклад 
    вершин в общий показатель модульности. Когда достигнут консенсус 
    (т. Е. Ни один шаг не увеличит оценку модульности), каждое сообщество 
    исходного графа сократится до одной вершины (при сохранении общего 
    веса краев инцидентов), и процесс продолжит на следующем уровне. 
    Алгоритм останавливается, когда невозможно увеличить модульность 
    после сжатия сообществ до вершин.
    '''
    pal = igraph.drawing.colors.ClusterColoringPalette(len(clusters))
    # A palette suitable for coloring vertices when plotting a clustering.
    g.vs['color'] = pal.get_many(clusters.membership)
    plot(g, **visual_style)