Beispiel #1
0
def build_graph(connList, vertexNames, outfile=None):
	"""Turn connectivity list and vertex names into iGraph object

	The connectivity list must be a pickle containing an array with three cols,
	each containing the source, target and edge weight respectively. The edge
	weight is supposed to be the numer of relations between source and target.
	This weight is referred to as the rel_weight (as opposed to the deg_weight)

	The vertexNames is also a pickle with a dictionary where every uri is a key
	whose value is the corresponding vertex id (just an integer). Note that the
	vertex id's are NOT preserved during conversion: iGraph creates new ids on
	the fly (numbering vertices in the order they appear in the edgelist).

	Args:
		- connList: filename of the pickle file containing a matrix, each row 
		containing source, target, weight
		- vertexNames: filename of a pickle containing a dictionary with uri's
		 as keys and vertex ids as values
		- outfile: filename where a GraphML version of the graph will be stored
		If outfile=None, then the graph object is only returned

	Returns
		- the iGraph graph object
		- stores an GraphML file if outfile is set.
	"""
	
	print('(1/6) Starting conversion, could take a while...')

	connList = pickle.load(open(connList,'rb'))
	connArr = np.array(connList, dtype='int')
	del connList
	vertexNames = pickle.load(open(vertexNames,'rb'))
	print('(2/6) Loading of pickle files completed.')

	edges = [{'source':s,'target':t,'rel_weight':w} for s,t,w in connArr]
	vertices = [{'orig_id':int(id),'uri':uri} for uri,id in vertexNames.iteritems()]
	print ('(3/6) Cleaning edges and vertices completed')

	graph = ig.Graph.DictList(vertices,edges, vertex_name_attr='orig_id')
	del graph.es['source']
	del graph.es['target']	
	print ('(4/7) iGraph object created')

	degrees=graph.degree()
	for e in graph.es:
		e['deg_weight'] = np.log(degrees[e.source]) + np.log(degrees[e.target])
	print ('(5/6) Finished calculating degree based weights')

	print('Summary of the graph:')
	ig.summary(graph)

	if outfile:
		graph.write_graphml(outfile)
		print('(6/6) GraphML file "'+outfile+'" saved; finished!')
	else:
	 	print('(6/6) Finished!')

	return graph
Beispiel #2
0
def vizsgalat(net):
    N=net.vcount()
    igraph.summary(net)
    cc=net.components()
    comp=len(cc.sizes())
    elek=net.ecount()
    if comp == 1 and elek == N-1:
        kod = pruf(net)
        print "Ez egy fa!"
    else:
        kod = None
    print kod
    print ""
    print ""
    return kod
 def testLoadSaveGraph(self, fileNameIn, fileNameOut):
     print("testLoadSaveGraph started ...");
     print "Igraph version %s" % (igraph.__version__);
     self.graph = igraph.Graph.Read_GraphML(fileNameIn);
     print("node parameters(%s): %s" %('id', self.graph.vs['id']));
     print("node parameters(%s): %s" %('label', self.graph.vs['label']));
     print("edge parameters(%s): %s" %('Edge Id', self.graph.es['Edge Id']));
     print("edge parameters(%s): %s" %('Edge Label', self.graph.es['Edge Label']));
     print("edge parameters(%s): %s" %('weight', self.graph.es['weight']));
     print("edge parameters(edge %d): %s" %(0, self.graph.es[0]));
     
     # fixing edge label
     self.graph.es['label'] = self.graph.es['Edge Label'];
     
     igraph.summary(self.graph);
     self.graph.write_graphml(fileNameOut);
     print("testLoadSaveGraph finished ...");
Beispiel #4
0
def contactsGraph(): 
    fileName = path + "connections-28-11-12"
    vertexIdDict = {} 
    vertexIdSet = set([])
    edgeSet = set([])
    edgeArray = []
    graph = igraph.Graph()
    i = 0 
    j = 0 
    
    with open(fileName) as f:
        f.readline()  
        
        for line in f:
            if i % 50000 == 0: 
                print(i)
            words = line.split()
            vId1 = int(words[0])
            vId2 = int(words[1])
            
            if vId1 not in vertexIdSet:    
                vertexIdDict[vId1] = j 
                vertexIdSet.add(vId1)
                j += 1 
            
            if vId2 not in vertexIdSet:    
                vertexIdDict[vId2] = j 
                vertexIdSet.add(vId2)
                j += 1 
            
            if (vertexIdDict[vId1], vertexIdDict[vId2]) not in edgeSet and (vertexIdDict[vId2], vertexIdDict[vId1]) not in edgeSet: 
                edgeArray.append([vertexIdDict[vId1], vertexIdDict[vId2]])
                edgeSet.add((vertexIdDict[vId1], vertexIdDict[vId2]))
                
            i += 1 

    print("Read " + str(i) + " lines with " + str(j) + " vertices")     
    
    graph.add_vertices(j)
    graph.add_edges(edgeArray)    
    print(igraph.summary(graph))

    graphStats = GraphStatistics()
    statsArray = graphStats.scalarStatistics(graph, slowStats=False)    
    print(graphStats.strScalarStatsArray(statsArray))
    
    xs, ys = zip(*[(left, count) for left, _, count in graph.degree_distribution().bins()])
    plt.figure(0)
    plt.bar(xs[0:30], ys[0:30])
    plt.xlabel("Degree")

    xs, ys = zip(*[(left, count) for left, _, count in graph.components().size_histogram().bins()])
    plt.figure(1)
    plt.bar(xs[0:30], ys[0:30])
    plt.xlabel("Component size")
    plt.show()
Beispiel #5
0
def fullCoauthorGraph(): 
    fileName = path + "coauthorsGraph"
    graph = igraph.Graph()
    graph = graph.Read_Edgelist(fileName)
    graph = graph.as_undirected()
    print(igraph.summary(graph))

    graphStats = GraphStatistics()
    statsArray = graphStats.scalarStatistics(graph, slowStats=False)    
    print(graphStats.strScalarStatsArray(statsArray))
def make_projection(graph, atts):
    """ makes bipartite projections, returns seller projection"""

    # PREPARE EDGE ATTRIBUTES
    graph.es['val'] = list(atts['vals'])
    graph.es['hs'] = list(atts['hs'])
    graph.es['dest'] = list(atts['dest'])
    graph.es['hss'] = list(atts['hss'])
    graph.es['dest_source'] = list(atts['dest_source'])
    graph.es['imp_name'] = list(atts['imp_name'])

    # PREPARE VERTEX ATTRIBUTES
    # The strength member function sums all of the edge values
    graph.vs['val'] = graph.strength(graph.vs, weights='val')
    # Get list of exporters who sell to the US
    us_list = what_sellers(graph.es, 'USA')
    graph.vs['US'] = 0
    graph.vs[us_list]['US'] = 1
    # Get list of exporters who sell to a seleted foreign coutnry
    us_list = what_sellers(graph.es, 'VEN')
    graph.vs['VEN'] = 0
    graph.vs[us_list]['VEN'] = 1
    # Get most frequent hs by exporter
    hs_tup = source_hs(graph.es,'hss')
    graph.vs['hs_source'] = 0
    graph.vs[hs_tup[0]]['hs_source'] = hs_tup[1]
    # Get most frequent destimation
    dest_tup = source_hs(graph.es,'dest_source')
    graph.vs['dest_source'] = 0
    graph.vs[dest_tup[0]]['dest_source'] = dest_tup[1]
    
    # SIZES FROM graph.csv
    size = 10046
    edge_size = 58031
    big_size = 40789 
    sub = size

    # MAKE THE TWO TYPES (SELLER AND BUYER)
    graph.vs['type'] = [1] * big_size
    graph.vs[sub:]['type'] = [0] * (big_size - sub)

    # PROEJECT AND ADD ATTRIBUTES
    proj2, proj1 = graph.bipartite_projection()
    proj1.vs['val'] = graph.vs[0:sub]['val']
    proj1.vs['val'] = graph.vs[0:sub]['val']
    # Get most valuable importer 
    max_imp = pd.read_pickle('max_imp.pickle')
    proj1.vs['imp_name'] = max_imp

    # WRITE AND READ
    proj1.write_pickle('proj1.pickle')
    proj1 = ig.read('proj1.pickle')
    print(ig.summary(proj1))

    return proj1, proj2
    def testCreateSaveGraph(self, fileNameOut):
        print("testCreateSaveGraph started ...");
        print "Igraph version %s" % (igraph.__version__);
        self.graph = igraph.Graph();
        self.graph.add_vertices(3);
        self.graph.add_edges([(0,1), (1,2)]);
        self.graph.vs['id'] = [5, 7, 9];
        self.graph.vs['size'] = [50, 30, 40];
        self.graph.vs['r'] = [255, 255, 0];
        self.graph.vs['g'] = [0, 0, 0];
        self.graph.vs['b'] = [0, 0, 255];
        self.graph.vs['x'] = [0, 100, 100];
        self.graph.vs['y'] = [0, 0, 100];
        self.graph.vs['label'] = ["Nada", "Zhenia", "Sasha"];

        self.graph.es['Edge Id'] = [57, 79];
        self.graph.es['Edge Label'] = ['Nada-Zhenia', 'Zhenia-Sasha'];
        self.graph.es['label'] = ['Nada-Zhenia-l', 'Zhenia-Sasha-l'];
        self.graph.es['weight'] = [1, 5];

        igraph.summary(self.graph);
        self.graph.write_graphml(fileNameOut);
        print("testCreateSaveGraph finished ...");
Beispiel #8
0
def articleGroupsGraph(): 
    fileName = path + "articleGroupMembership-28-11-12"
    graph = readBipartiteGraph(fileName) 
    print(igraph.summary(graph))

    graphStats = GraphStatistics()
    statsArray = graphStats.scalarStatistics(graph, slowStats=False)    
    print(graphStats.strScalarStatsArray(statsArray))
    
    xs, ys = zip(*[(left, count) for left, _, count in graph.degree_distribution().bins()])
    plt.figure(0)
    plt.bar(xs[0:30], ys[0:30])
    plt.xlabel("Degree")

    xs, ys = zip(*[(left, count) for left, _, count in graph.components().size_histogram().bins()])
    plt.figure(1)
    plt.bar(xs[0:30], ys[0:30])
    plt.xlabel("Component size")
    plt.show() 
Beispiel #9
0
#!/usr/bin/env python

import igraph as ig

g = ig.Graph.Read_Ncol("data/200_edges_no_dups.ncol", directed=False)
ig.summary(g)

layout = g.layout("kk")

style = dict()
style["vertex_size"] = 10
style["edge_width"] = [1 + w/1000 for w in g.es["weight"]]
style["layout"] = layout
ig.plot(g, **style)
Beispiel #10
0
import igraph
import numpy as np

matrix = [[0,1,2],[1,0,0],[2,0,0]]
g = igraph.Graph.Adjacency(matrix)
print g.get_edgelist()

igraph.summary(g)

print g.get_edgelist()[0]
from igraph import Graph, summary
from igraph import plot as iplot
# iplot néven, hogy ne keveredjen a pylab.plot függvénnyel

# Programfájlban kellene még ez a sor is a -pylab opció helyett:
 from pylab import plot, average, array, grid, xlabel, ylabel, legend, show
# vagy egyszerűen
 from pylab import *
# és minden plot függvény után kellene
# show() függvény.
 import pylab
# esetén pedig pylab.függvény() alakban hívhatóak a pylab függvényei.

nw = Graph.Erdos_Renyi(1000, .001)

summary(nw)

M=nw.ecount()
N = nw.vcount()
Mmax = N*(N-1)/2
Mmax
M/Mmax
p = 1.*M/Mmax

nw.diameter()

nw.components()
cc=nw.components()  # (összefüggő) komponensek, (connected) components
ccs = cc.sizes()
max(ccs)
average(ccs)
Beispiel #12
0
from igraph import Graph, summary
from igraph import plot as iplot
# iplot néven, hogy ne keveredjen a pylab.plot függvénnyel

# Programfájlban kellene még ez a sor is a -pylab opció helyett:
 from pylab import plot, average, array, grid, xlabel, ylabel, legend, show
# vagy egyszerűen
 from pylab import *
# és minden plot függvény után kellene
# show() függvény.
 import pylab
# esetén pedig pylab.függvény() alakban hívhatóak a pylab függvényei.

net = Graph.Erdos_Renyi(1000, .001)

summary(net)

M=net.ecount()
N = net.vcount()
Mmax = N*(N-1)/2
Mmax
M/Mmax
p = 1.*M/Mmax

net.diameter()

net.components()
cc=net.components()  # (összefüggő) komponensek, (connected) components
ccs = cc.sizes()
max(ccs)
average(ccs)
Beispiel #13
0
############################################################### Q 11
#from Q6 get GCC vertex
import os
os.chdir('/Users/Ray/Desktop/ECE232_HW5/2/dataset/preprocess')

import igraph as ig
g = ig.Graph.Read(f='preprocessed.txt', format='ncol', directed=False)
gcc = g.components().giant()
ig.summary(gcc)

vertexID_list = []
for i in range(len(gcc.vs)):
    vertexID_list.append(int(gcc.vs[i]['name']))

import json
import pandas as pd
import numpy as np
os.chdir('/Users/Ray/Desktop/ECE232_HW5')
with open('san_francisco_censustracts.json') as f:
    geoBound = json.load(f)

cood_mean_arr = np.zeros((len(geoBound['features']), 2))
# use mean coordinate to represent the node
for i in range(len(geoBound['features'])):
    cood_mean_arr[i, :] = np.mean(
        geoBound['features'][i]['geometry']['coordinates'][0][0], axis=0)

import matplotlib.pyplot as plt
from scipy.spatial import Delaunay
cood_mean_arr_gcc = cood_mean_arr[[x - 1 for x in vertexID_list
                                   ], :]  #fit to cood_mean
Beispiel #14
0
#igraph.summary(c.Graph)
#c.make_Graph(15, 200, prob = 0.1)
#igraph.summary(c.Graph)
#print len(c.clique_list)
#g = c.Graph.copy()
#d = DataPolishing(g)
#igraph.summary(d.Graph)
#print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "randam_clique_5000.gml")
#d.data_polish(polish_ratio = pr)
#igraph.summary(d.Graph)
#print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "polished_clique_5000.gml")
#print "recall = " , c.recall(d.Graph)
#print "precision = " , c.precision(d.Graph)
#print "accuracy = " , c.accuracy(d.Graph)

g = igraph.read("twitter_graph.gml")
a = DataPolishing(g)
print "original:"
igraph.summary(a.Graph)
print len(a.Graph.maximal_cliques(min = 3))

a.data_polish(polish_ratio = pr)

print "polished"
igraph.summary(a.Graph)
print len(a.Graph.maximal_cliques(min = 3))

igraph.write(a.Graph, "polished_twitter_grapht.gml")
    return dict(rem_deg)


edgefiles = infile

f = open(outfile, 'w')

for fil in gb.glob(edgefiles):

    #Read the edge list; Change parameter weights and directed for network of study
    p = ig.Graph.Read_Ncol(fil, weights=weight, directed=False, names=True)

    #number of nodes
    num_nodes = p.vcount()

    ig.summary(p)

    dg_dis = p.degree()
    dg_dis_count = []
    for i in set(dg_dis):
        dg_dis_count.append((i, dg_dis.count(i)))

    edg = entropy_dg(dict(dg_dis_count))
    print('\nEntropy of degree distribution for the given network is ', edg)
    a = rem_deg_dist(dict(dg_dis_count))
    erdg = entropy_rdg(a)
    print(
        '\nEntropy of remaining degree distribution for the given network is ',
        erdg, '\n')

    f.write(fil + '\t' + str(erdg) + '\n')
Beispiel #16
0
def main():
    
    node_limit, detection, weight_threshold, density, measure, start_date, end_date, theme, plot_network = get_params()
    experiment_parameters = (get_params())

    print("Sample limit: {}".format(node_limit))
    print("Community detection: {}".format(detection))
    print("Edge weight threshold: {}".format(weight_threshold))

    # %% Read data
    path = 'resources/votos_31-01-2019_to_30-12-2020.csv'
    df = pd.read_csv(path)

    basename = ntpath.basename(path)
    print(basename)
    random.seed(0)

    if theme is not None:
        df = filter_by_theme(df, theme, start_date, end_date)
    df, reps = filter_by_name_and_quantity(df, node_limit)

    rep_to_ind = {reps[i]: i for i in range(len(reps))}
    motions = df['idVotacao'].unique()
    motion_to_ind = {motions[i]: i for i in range(len(motions))}
    parties = [p for p in df['deputado_siglaPartido'].unique() if pd.notna(p)]
    edges = []

    vote_matrix = np.zeros((len(reps), len(motions)))
    df_grouped = df.groupby(['idVotacao', 'deputado_nome'])
    for group, df_group in df_grouped:
        voto = df_group['voto'].values[0]
        i = rep_to_ind[group[1]]
        j = motion_to_ind[group[0]]
        if voto == "Sim":
            vote_matrix[i,j] = 1
        if voto == "Não":
            vote_matrix[i,j] = -1

    if measure == 'generalized':
        M = generalized_similarity(vote_matrix)
    elif measure == 'pearson':
        M = pearson_correlation(vote_matrix)
    else:
        raise NotImplementedError

    for dep1, dep2 in combinations(range(len(reps)), 2):
        if M[dep1,dep2] > 0:
            edges.append(((dep1,dep2), M[dep1,dep2]))
    
    #plot_similarity_distribution([e[1] for e in edges if e[1] > 0.99], weight_threshold)

    g = Graph(graph_attrs={'name': 'Camera dos Deputados'}, directed=False)
    g.add_vertices(reps)
    edges, weights = filter_edges(edges, num_nodes=g.vcount(), threshold=weight_threshold, density=density)
    g.add_edges(edges)
    g.es['weight'] = weights
    # Normalize weights to [0,1]
    maxw = max(g.es['weight'])
    minw = min(g.es['weight'])
    g.es['weight'] = [(e - minw) / (maxw - minw) for e in g.es['weight']]
    summary(g)

    if detection == 'leiden':
        communities = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition, weights='weight', n_iterations=100).membership
        #communities = g.community_leiden(objective_function='modularity', weights='weight', n_iterations=100)
    elif detection == 'spinglass':
        communities = g.community_spinglass(weights='weight').membership
    elif detection == 'multilevel':
        communities = g.community_multilevel(weights='weight').membership
    elif detection == 'party':
        communities = groups_by_party(df, reps, parties)
    else:
        raise NotImplementedError
        
    modularity = g.modularity(communities, 'weight')
    print("Modularity Score: ", modularity)
    save_modularity(modularity, theme, start_date, end_date)

    g.vs['partido'] = [parties[i] for i in groups_by_party(df, reps, parties)]
    g.vs['url_foto'] = [df[df['deputado_nome'] == dep]['deputado_urlFoto'].values[0] for dep in g.vs['name']]
    g.vs['uf'] = [df[df['deputado_nome'] == dep]['deputado_siglaUf'].values[0] for dep in g.vs['name']]
    g.save('graphs/g.graphml')

    degrees, betweenness, closeness, clustering_coef = collect_metrics(g, experiment_parameters)

    if plot_network:
        period = start_date + '_to_' + end_date
        draw_vis(g, groups=communities, parties=parties, theme=theme, period=period, degrees=degrees, betweenness=betweenness, closeness=closeness, clustering_coef=clustering_coef)
def sac1(graph):
    results = []

    attributes = [attribute_map[x] for i, x in enumerate(attribute_map.keys())]
    weights = [1 for x in range(0, graph.ecount())]

    graph.es["weight"] = weights
    graph.vs["sim"] = attributes
    #graph.vs["community"] = []

    for k in range(0, 15):
        membership = [(x) for x in range(0, graph.vcount())]
        membership_old = copy.copy(membership)
        clustering_old = igraph.VertexClustering(graph, membership)
        #igraph.plot(clustering_old)

        print igraph.summary(clustering_old)

        #A pass
        for k in range(0, 15):
            starting_membership = copy.copy(membership)

            for vert in range(0, len(membership)):
                mod_results = []
                q_newman_cached = {}
                community_size = len(set(membership))

                vert_old = igraph.VertexClustering(graph, membership=membership)
                mod_old = vert_old.modularity

                for vertj in range(0, len(membership)):
                    community = membership[vertj]

                    if community not in q_newman_cached:
                        membership_copy = copy.copy(membership)
                        membership_copy[vert] = community
                        community_size_new = len(set(membership_copy))
                        comm_indices = [i for i, x in enumerate(membership) if x == community]
                        comm_indices_new = [i for i, x in enumerate(membership_copy) if x == community]

                        vert_new = igraph.VertexClustering(graph, membership=membership_copy)
                        mod_new = vert_new.modularity

                        modularity_diff = mod_new - mod_old

                        #if modularity_diff > 0:
                            #print "Modularity", modularity_new, "-", modularity_old, "=", modularity_diff
                        #print "Mod       ", mod_new, "-", mod_old, "=", modularity_diff

                        sim_result_old = simularity(graph, comm_indices)
                        sim_result_new = simularity(graph, comm_indices_new)

                        #print sim_result_old, sim_result_new

                        sim_result = (sim_result_new - sim_result_old)
                        q_newman = alpha*modularity_diff + (1-alpha)*(sim_result)/(math.pow(community_size_new, 2))
                        q_newman_cached[community] = q_newman
                        result = (community, q_newman)
                        mod_results.append(result)

                filtered_results = filter(lambda (c,m): m > 0, mod_results)

                if len(filtered_results) > 0:
                    sorted_results = sorted(filtered_results, key=itemgetter(1), reverse=True)
                    membership[vert] = sorted_results[0][0]

            diff = reduce(lambda x,y: x+y, map(lambda (x,y): 1 if x != y else 0, zip(starting_membership, membership)), 0)
            print "Membership diff of", diff

            if starting_membership == membership:
                print "No further changes can be made"
                break;

        if len(results) != 0 and results[len(results)-1]== membership:
            print "No further improvements, finished on ", k
            break;

        previous_communities = None
        if "community" in set(graph.vertex_attributes()):
            previous_communities = {i:e for i,e in enumerate(graph.vs["community"])}
            #print previous_communities

        results.append(copy.copy(membership))
        optimal_membership = copy.copy(membership)

        #Rename optimal membership so it'll remove nodes, communities should be 0 to n.
        for k, x in enumerate(sorted(set(optimal_membership))):
            for l, y in enumerate(optimal_membership):
                if x == y:
                    optimal_membership[l] = k

        print optimal_membership
        combinations = {
            "sim" : lambda x: sum_attributes(x)
        }
        graph.contract_vertices(optimal_membership, combine_attrs=combinations)
        
        community_dict = defaultdict(list)

        for k, x in enumerate(optimal_membership):
            community_dict[x].append(k)

        if previous_communities is None :
            community_list = [set(community_dict[l]) for l in community_dict]
        else :
            community_list = [[previous_communities[c] for c in community_dict[l]] for l in community_dict]
            community_list = map(lambda x: [item for sublist in x for item in sublist], community_list)
            print community_list

        graph.vs["community"] = community_list
        graph.simplify(combine_edges=dict(weight="sum"), multiple=True, loops=False)

    return graph.vs["community"]
if sys.platform == 'linux2':
	path = '../'
if not os.path.exists(path+"IgraphEdges") : #igraphe require a specific input format
	fin = open(path+'database_'+maxDateStr+'.txt.gz')
	df = pandas.read_csv(fin,sep=",",encoding="utf8",compression = 'gzip')
	df["userID"] *= 2
	df["movieID"] *= 2
	df["movieID"] += 1
	df[["userID","movieID","rating"]].to_csv(path+"IgraphEdges",sep = "\t",encoding = "utf-8",header = False, index = False)


fin = path+"IgraphEdges"
print "Now let's try Igraph"
g = Graph.Read_Ncol(fin, directed=True,weights = True) #read the graph
g.vs["type"] = [int(name)%2 == 1 for name in g.vs["name"]] #assign the movie or user type : 1 = movie
igraph.summary(g)

timestart =  time.time() ##Not enough RAM, as expected
g.get_adjacency()
print "time to compute the adjacency matrix %d sec" % int(time.time() - timestart)

timestart =  time.time()
a = g.degree_distribution(mode = "in")
print a
print "time to compute inbound (~movies) degree distribution (for the bipartite graph) %d sec" % int(time.time() - timestart)

timestart =  time.time()
a = g.degree_distribution(mode = "out")
print a
print "time to compute outbound (~user) degree distribution (for the bipartite graph) %d sec" % int(time.time() - timestart)
Beispiel #19
0
#igraph.write(g, "randam_test.gml")
#a = DataPolishing(g)
#igraph.summary(a.Graph)
#print a.Graph.maximal_cliques(min = 3)
#print len(a.Graph.maximal_cliques(min = 3))

#a.data_polish(polish_ratio = pr)

#print a.Graph.maximal_cliques(min = 3)
#print len(a.Graph.maximal_cliques(min = 3))
#igraph.write(a.Graph, "polished_grapht.gml")
#igraph.summary(a.Graph)

c = Experiment(5000)

igraph.summary(c.Graph)
c.make_Graph(30, 100)
igraph.summary(c.Graph)
#print len(c.clique_list)
g = c.Graph.copy()
d = DataPolishing(g)
igraph.summary(d.Graph)
print len(d.Graph.maximal_cliques(min=3))
#igraph.write(d.Graph, "randam_clique_5000.gml")
d.data_polish(polish_ratio=pr)
igraph.summary(d.Graph)
print len(d.Graph.maximal_cliques(min=3))
#igraph.write(d.Graph, "polished_clique_5000.gml")
print "recall = ", c.recall(d.Graph)
print "precision = ", c.precision(d.Graph)
print "accuracy = ", c.accuracy(d.Graph)
        cnt = 0

        for line in f:
            ls = line[:-2].split('\t')
            #print ls
            #follower =
            #for i in xrange(1,len(ls)/2):
            follower = int(ls[0])
            for i in xrange(1, len(ls) / 2):
                followee = int(ls[2 * i])
                edges.append([followee, follower])

        f.close()
    g.add_edges(edges)

    print igraph.summary(g)
    print 'loading graph takes %f' % (time() - t_0)

    t_0 = time()
    evcent = g.evcent()
    print 'evcent computation takes %f' % (time() - t_0)

    #print type(evcent),len(evcent)
    t_0 = time()
    k_shell = g.shell_index()
    print 'kshell computation takes %f' % (time() - t_0)

    t_0 = time()
    outdeg = g.outdegree()
    print 'outdeg computation takes %f' % (time() - t_0)
    #print type(k_shell),len(k_shell)
Beispiel #21
0
        loss = train_batch(model, X_couples, y_labels)
        losses += loss
        if epoch % print_every == 0:
            logging.info(
                "Mean loss in Epoch [%s] with %s valid i8sequences = %s" %
                (epoch, valid_sequences, losses / valid_sequences))
            losses, valid_sequences = 0.0, 0


if __name__ == "__main__":
    # g = Graph.Read_Edgelist("deepwalk/p2p-Gnutella08.edgelist")
    g = load_adjlist("deepwalk/karate.adjlist", directed=False)
    vocab_size = len(g.vs)
    max_len = 5
    save = True
    sampling_table = make_sampling_table(vocab_size)
    degrees = np.array(g.vs.degree())
    inv_sqrt_degree = 1 / np.sqrt(degrees)
    sampling_table = inv_sqrt_degree / np.sum(inv_sqrt_degree)
    logging.info("Graph Summary: \n", summary(g))
    logging.info("Building Model")
    if save:
        model = pickle.load(open("out/Karate.Model.3100.pkl"))
    else:
        model = pickle.load("out/Karate.Model.3100.pkl")
        model = Sequential()
        model.add(WordContextProduct(vocab_size, proj_dim=300, init='uniform'))
        model.compile(loss='binary_crossentropy', optimizer='rmsprop')
        #couples, labels = skipgrams(sequences[np.random.randint(vocab_size)], vocab_size, window_size=4, negative_samples=1.0, sampling_table=sampling_table)
        #train_on_model(model, g, vocab_size, print_every=1)
        #pickle.dump(model, open("out/Karate.Model.3100.pkl", "wb"))
Beispiel #22
0
 def confimation(self, graph_ig, graph_nx):
     igraph.summary(graph_ig)
     print(graph_nx.number_of_edges())
     print(graph_nx.number_of_nodes())
Beispiel #23
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import igraph as G
import numpy as np

# Build the graph
p2 = G.Graph.Read_GML("../archivos/redchica.gml")
G.summary(p2)
# Plot the graph

betweenness = p2.betweenness()
pageranks   = [round(i, 3) for i in p2.pagerank()]
indegree    = p2.degree(mode="in")
names = p2.vs["label"]
p2.vs["label"] = ["B: " + str(betweenness[i]) + "\nPR: " + str(pageranks[i]) + "\nInD: " + str(indegree[i]) + "\n" + names[i] for i in range(8)]
p2.es["width"] = 1
p2.vs["color"] = "lightblue"
p2.vs["size"] = 80
print sorted([(i, j) for i, j in enumerate(indegree)], key=lambda x: x[1], reverse=True)
print sorted([(i, j) for i, j in enumerate(betweenness)], key=lambda x: x[1], reverse=True)
print sorted([(i, j) for i, j in enumerate(pageranks)], key=lambda x: x[1], reverse=True)
G.plot(p2, "../img/p4-all.png", margin=50)

# plot indegree
p2.vs["label"] = ["InD: " + str(indegree[i]) + "\n" + names[i] for i in range(8)]
p2.vs['size'] = [500.0*i/sum(indegree) for i in indegree]
p2.vs["color"] = "green"
G.plot(p2, "../img/p4-indegree.png", margin=50)

# plot betweenness
Beispiel #24
0
import networkx as nx
import community
import igraph
import cairocffi


#G=nx.read_gml("erdoscom.gml")
#print(nx.info(G))

iG=igraph.Graph()
iG=igraph.read("erdos.gml")
print (igraph.summary(iG))
print(iG)
#vertDendo=iG.community_fastgreedy()
#print(vertDendo.membership)
igraph.plot(iG)
"""print g.vs.select(long_lt=4)["name"] 
print g.vs.select(_degree = g.maxdegree())["name"]
_lt = less than
_eq = equals"""

"""g.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"]
g.vs["age"] = [25, 31, 18, 47, 22, 23, 50]
g.vs["gender"] = ["f", "m", "f", "m", "f", "m", "m"]
g.es["is_formal"] = [False, False, True, True, True, False, True, False, False]
print g.es[0]
g.es[0]["is_formal"] = True
print g.es[0]
g.es[0]["name"] = "Sharon"
print g.es[0]"""

"""g1 = ig.Graph.Tree(127, 2)
g2 = ig.Graph.Tree(127, 2)
print g2.get_edgelist() == g1.get_edgelist()
ig.summary(g1)"""

"""tree = ig.Graph.Tree(127, 2)
g=nx.Graph()##
n0=0
n=24
nodeSize=500
fontSize=12
airportCodes = [1,2,3,4,5,6,7]
g.add_nodes_from(airportCodes)
g.add_edges_from([(1,2), (1,3), (3,2), (3,4), (4,5), (4,6), (6,5), (5,7), (6,7)])
print tree.get_edgelist()
layout=G.layout("kk")
Beispiel #26
0
def calc_hac_communities(h5_data,
                         adjacency_matrix,
                         linkage_method="average",
                         metric="correlation",
                         plot_flag=True,
                         threshold=None):

    distance_matrix = 1 - adjacency_matrix
    # Create condensed distance matrix
    # A condensed distance matrix is a flat array containing the upper triangular of the distance matrix. (SciPy)
    distance_array = distance_matrix[np.triu_indices_from(distance_matrix,
                                                          k=1)]
    # Alternative to the upper version
    #np.fill_diagonal(distance_matrix, 0.0)
    #distance_matrix = np.around(distance_matrix, 7) #Attention! Round affects clustering
    #distance_array = squareform(distance_matrix)

    # Linkage can be single, complete, average, weighted
    # Calculate linkage matrix
    z = hac.linkage(distance_array, linkage_method, metric)

    # Creation of the actual graph
    hac_community_G = base_graph_structure(h5_data, adjacency_matrix)

    # Calculate dendrogram-cut based on modularity optimization
    threshold_list = []
    for x in range(1, len(adjacency_matrix) + 1):
        memberships = hac.fcluster(z, x, criterion="maxclust")
        threshold_list.append(modularity_trsh(memberships, hac_community_G))

    if plot_flag == True:
        plt.figure()
        plt.xticks(range(0, len(adjacency_matrix)),
                   range(1,
                         len(adjacency_matrix) + 1))
        plt.title("modularity")
        plt.plot(threshold_list)
        plt.figure()
        hac.dendrogram(z)
        plt.show()

    if threshold == None:
        print("")
        print("Threshold by Modularity used!")
        # +1 because modularity calculation starts with 1 cluster instead of 0, but indexing starts with 0
        threshold = threshold_list.index(max(threshold_list)) + 1
    else:
        print("")
        print("Threshold set manually!")

    # Calculate Hierarchical Clustering
    #membership_list = hac.fclusterdata(data_matrix, threshold, criterion="maxclust", metric=metric, method=linkage_method)
    membership_list = hac.fcluster(z, threshold, criterion="maxclust")

    # Reduce each membership value by one
    # fcluster starts with membership number one, for transformation into ig.VertexClustering a starting membership of zero is needed
    membership_list = map(lambda x: x - 1, membership_list)

    hac_communities = ig.VertexClustering(hac_community_G,
                                          membership=membership_list)

    print("")
    print("Community Graph:")
    print(ig.summary(hac_community_G))

    print("")
    print("Threshold of Dendrogramm Cut: " + str(threshold))

    # Add community membership as attribute
    for vertex in hac_community_G.vs:
        vertex["membership"] = hac_communities.membership[vertex.index]

    print("")
    print("Number of Communities: " + str(len(list(hac_communities))))

    # Calculate unweighted modularity
    modularity = hac_communities.modularity
    # Calculate weighted modularity
    # modularity = hac_community_G.modularity(hac_communities, weights=hac_community_G.es["weight"])
    print("")
    print("Modularity: " + str(modularity))

    return hac_community_G, hac_communities
Beispiel #27
0
# assign node names and weights to be attributes of the vertices and edges
# respectively
G.vs['label'] = vocab
G.es['weight'] = weights

# I will also assign the weights to the 'width' attribute of the edges. this
# means that igraph.plot will set the line thicknesses according to the edge
# weights
#G.es['width'] = weights

# plot the graph, just for fun (oops need to install Cairo for this)
#igraph.plot(G, layout="rt", labels=True, margin=80)

# run the greedy community detection algorithm

print ig.summary(G)
print G.get_edgelist()[1:20]
print G.vs['label'][1:20]

# quick look at the degree histogram
NUMBINS = 20
if verboseplot:
    plt.figure()
    plt.hist(G.degree(), NUMBINS)
    plt.title('degree distribution for the word co-occurrences graph')
    plt.show()

print "finding high modularity communities..."
G_simple = G.simplify()  # removes self loops and duplicate edges
word_dendrogram = G.community_fastgreedy()
print "word dendrogram " + str(word_dendrogram.merges)
Beispiel #28
0
    df = pandas.read_csv(fin, sep=",", encoding="utf8", compression='gzip')
    df["userID"] *= 2
    df["movieID"] *= 2
    df["movieID"] += 1
    df[["userID", "movieID", "rating"]].to_csv(path + "IgraphEdges",
                                               sep="\t",
                                               encoding="utf-8",
                                               header=False,
                                               index=False)

fin = path + "IgraphEdges"
print "Now let's try Igraph"
g = Graph.Read_Ncol(fin, directed=True, weights=True)  #read the graph
g.vs["type"] = [int(name) % 2 == 1 for name in g.vs["name"]
                ]  #assign the movie or user type : 1 = movie
igraph.summary(g)

timestart = time.time()  ##Not enough RAM, as expected
g.get_adjacency()
print "time to compute the adjacency matrix %d sec" % int(time.time() -
                                                          timestart)

timestart = time.time()
a = g.degree_distribution(mode="in")
print a
print "time to compute inbound (~movies) degree distribution (for the bipartite graph) %d sec" % int(
    time.time() - timestart)

timestart = time.time()
a = g.degree_distribution(mode="out")
print a
def generate_node_edge_lists(msi_frame_1, msi_frame_2, graph_1, graph_2):
    edges_g1 = [(msi_frame_1.columns[e.tuple[0]],
                 msi_frame_1.columns[e.tuple[1]]) for e in graph_1.es]
    edges_g2 = [(msi_frame_2.columns[e.tuple[0]],
                 msi_frame_2.columns[e.tuple[1]]) for e in graph_2.es]
    nodes_g1 = [msi_frame_1.columns[v.index] for v in graph_1.vs]
    nodes_g2 = [msi_frame_2.columns[v.index] for v in graph_2.vs]
    nodes_g1_only = set(nodes_g1) - set(nodes_g2)
    nodes_g2_only = set(nodes_g2) - set(nodes_g1)
    nodes_common = set(nodes_g1) & set(nodes_g2)
    edges_g1_only = set(edges_g1) - set(edges_g2)
    edges_g2_only = set(edges_g2) - set(edges_g1)
    edges_common = set(edges_g1) & set(edges_g2)
    print(edges_g1)
    print(edges_g2)
    print(nodes_g1)
    print(nodes_g2)
    print("len symdiff edges")
    print(len(set(edges_g1) ^ set(edges_g2)))
    print("len symdiff nodes")
    print(len(set(nodes_g1) ^ set(nodes_g2)))
    print("len union edges")
    print(len(set(edges_g1) & set(edges_g2)))
    print("len union nodes")
    print(len(set(nodes_g1) & set(nodes_g2)))
    print("g1 only nodes")
    print(len(nodes_g1_only))
    print("g2 only nodes")
    print(len(nodes_g2_only))
    print("g1 only edges")
    print(len(edges_g1_only))
    print("g2 only edges")
    print(len(edges_g2_only))
    print(ig.summary(graph_1))
    print(ig.summary(graph_2))
    for x in nodes_g1_only:
        if x in nodes_g2:
            print("error")
        if x not in nodes_g1:
            print("error")
    for x in nodes_g2_only:
        if x in nodes_g1:
            print("error")
        if x not in nodes_g2:
            print("error")
    for x in nodes_common:
        if x not in nodes_g1:
            print("error")
        if x not in nodes_g2:
            print("error")
    for x in edges_g1_only:
        if x in edges_g2:
            print("error")
        if x not in edges_g1:
            print("error")
    for x in edges_g2_only:
        if x in edges_g1:
            print("error")
        if x not in edges_g2:
            print("error")
    for x in edges_common:
        if x not in edges_g1:
            print("error")
        if x not in edges_g2:
            print("error")
    return nodes_g1, edges_g1, nodes_g1_only, edges_g1_only, \
    nodes_g2, edges_g2, nodes_g2_only, edges_g2_only, \
    nodes_common, edges_common
def gen_graph_from_nol(fncol):
    g = Graph()
    g = g.Read_Ncol(fncol, names=True, weights=True, directed=True)
    print summary(g)
    return g
Beispiel #31
0
        'edge_osmid': e_p['link_id'],
        'edge_index': edge_index,
        'start_node': e_p['start_node'],
        'end_node': e_p['end_node'],
        'sec_speed': e_p['sec_speed'],
        'sec_length': e_length,
        'sec_duration': e_length / e_p['sec_speed']
    }
    nodes_in_edge_set.add(e_p['start_node'])
    nodes_in_edge_set.add(e_p['end_node'])
    edge_data.append(edge_element)
    edge_index += 1

### Check if all nodes in the edge dataset are contained in the provided nodes dataset
print(nodes_in_edge_set.issubset(set([*node_json])))

g = igraph.Graph.DictList(vertices=node_data,
                          edges=edge_data,
                          vertex_name_attr='node_osmid',
                          edge_foreign_keys=('start_node', 'end_node'),
                          directed=True)
print(igraph.summary(g))
# print(g.vs[0])
# print(g.es.find(edge_osmid='101554764'))
# route_a = g.get_shortest_paths(
#     g.vs.find(node_osmid='1172644728'),
#     g.vs.find(node_osmid='1172712808'),output="epath")
# print(route_a)
g.write_graphmlz('{}_{}_0509.graphmlz'.format(FOLDER, FILE))
# g = igraph.load('Collected_data_False14.graphmlz')
Beispiel #32
0
#igraph.write(g, "randam_test.gml")
#a = DataPolishing(g)
#igraph.summary(a.Graph)
#print a.Graph.maximal_cliques(min = 3)
#print len(a.Graph.maximal_cliques(min = 3))

#a.data_polish(polish_ratio = pr)

#print a.Graph.maximal_cliques(min = 3)
#print len(a.Graph.maximal_cliques(min = 3))
#igraph.write(a.Graph, "polished_grapht.gml")
#igraph.summary(a.Graph)

c = Experiment(5000)

igraph.summary(c.Graph)
c.make_Graph(30,100)
igraph.summary(c.Graph)
#print len(c.clique_list)
g = c.Graph.copy()
d = DataPolishing(g)
igraph.summary(d.Graph)
print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "randam_clique_5000.gml")
d.data_polish(polish_ratio = pr)
igraph.summary(d.Graph)
print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "polished_clique_5000.gml")
print "recall = " , c.recall(d.Graph)
print "precision = " , c.precision(d.Graph)
print "accuracy = " , c.accuracy(d.Graph)
# ============================ Q6 =============================

import igraph as ig
import json

g = ig.Graph.Read(f='dataset/edge_weight', format='ncol', directed=False)
gcc = g.components().giant()
ig.summary(g)

name_to_disname_loc = {}

with open('dataset/san_francisco_censustracts.json', 'r') as f:
    data = json.loads(f.readline())
    features = data['features']
    for feature in features:
        coordinates = feature['geometry']['coordinates'][0][0]
        latitude = 0
        longitude = 0
        for coordinate in coordinates:
            latitude += coordinate[0]
            longitude += coordinate[1]
        latitude /= len(coordinates)
        longitude /= len(coordinates)
        name_to_disname_loc[feature['properties']['MOVEMENT_ID']] = (
            feature['properties']['DISPLAY_NAME'], latitude, longitude)

seq = gcc.vs()
for i in seq:
    i['display name'] = name_to_disname_loc[i['name']][0]
    i['location'] = (name_to_disname_loc[i['name']][1],
                     name_to_disname_loc[i['name']][2])
# Initiate the graph
graph = ig.Graph(1)

# Add nodes aka vertices
graph.add_vertices(2)

# Add edges
graph.add_edges([(0, 1), (1, 2)])

# Delete edges
# graph.delete_edges()

# Delete vertices
# graph.delete_vertices()

# To get the id of an edge between two nodes
# graph.get_eid(nodeID_one, nodeID_two)

graph.add_edges([(2, 0)])
graph.add_vertices(3)
graph.add_edges([(2, 3), (3, 4), (4, 5), (5, 3)])

# Draw graphs
#  Deterministic generators produce the same graph if you call them with exactly the same parameters, while stochastic
# generators produce a different graph every time.

#print(graph)

# Doesn't work
print(ig.summary(graph))
import easygui
from matplotlib import pyplot as plt
import numpy as np

print(ig.__version__)

#
# TASK 1: Read Data Files
#

filename = "./USAir97.net"
print(filename)

g = ig.Graph.Read_Pajek(filename)

ig.summary(g)
g.is_weighted()

# Get the attributes and the number of nodes
print(g.vs.attribute_names())
print(g.vcount())

# Get the attributes and the number of edges
print(g.es.attribute_names())
print(g.ecount())

#
# TASK 2: Calculating Centrality Measures
#

degree = g.degree()
Beispiel #36
0
import igraph, pylab

g = igraph.load('social_comp.graphml')
#Graph Summary
print "Graph Summary :", igraph.summary(g)
print "Directed : ", g.is_directed()  #Directed graph
print "Weighted : ", g.is_weighted()  #Not weighted graph
print "Diameter: ", g.diameter()  #7
print "Density: ", g.density()  #0.000788246509705

#Determine the strongly connected components by clustering using strong connection
g_clusters = g.clusters(mode='strong')
#Check the cluster sizes
cluster_sizes = g_clusters.sizes()
max_cluster = max(cluster_sizes)

#Clusters of size 1
clusters_of_size_1 = cluster_sizes.count(1)
print 'No. of strongly connected components in the graph: ', len(
    g_clusters)  #5736
print 'Length of largest strongly connected component in the graph: ', max_cluster  #154
# print 'No. of strongly connected components with just single node in the graph: ',clusters_of_size_1
# print type(g.betweenness(directed=True))

#Calculate the betweenness centrality values for nodes
betweeness_centrality_values = g.betweenness(directed=True)

#Calculate the histogram for the betweenness centrality values
betweeness_centrality_values_histogram = igraph.Histogram(
    bin_width=10, data=betweeness_centrality_values)
# print 'Distribution of betweenness centrality values among nodes\n',betweeness_centrality_values_histogram
Beispiel #37
0
def generate_projection_image(V, E, V_lon, E_lon, A, n, out_file, g_min=None):
    E_probs = get_edges_probs(E)
    E = set(E.keys())

    print("projection nodes in LON", len(V & V_lon), " / ", len(V) )
    print("projection edges in LON", len(E & set(E_lon.keys())), " / ", len(E))

    results = sorted([path_length(s, A, n) for s in V])  # ascending
    if not g_min:
        g_min = results[0]

    if len(results) > 400:
        threshold = results[400]  # max number of nodes
    else:
        threshold = results[-1]

    V = {s for s in V if path_length(s, A, n) <= threshold}  # s - path
    V_ = {s: (i, path_length(s, A, n)) for i, s in enumerate(V)}

    Not_Sinks = set()
    for s in V:  # TODO optimal?
        for (v, u) in E:
            if v == s:
                Not_Sinks.add(s)
                break

    V_c = np.zeros(len(V_), dtype=object)
    for s, (i, r) in V_.items():
        V_c[i] = (s, r)
    E_ = [(V_[s1][0], V_[s2][0]) for s1, s2 in E if s1 in V_ and s2 in V_]
    E_size = [5 * E_probs[s1, s2] for s1, s2 in E if s1 in V_ and s2 in V_]  # TODO
    pos_glob = find_pos_glob(V_, E, g_min, A, n)

    g = igraph.Graph(directed=True)
    g.add_vertices(len(V_))
    g.add_edges(E_)

    visual_style = {}
    visual_style["layout"] = \
        g.layout_fruchterman_reingold(maxiter=5000)
    visual_style["vertex_color"] = ['red' if t[0] in pos_glob and t[0] in V_lon else
                                    'pink' if t[0] in pos_glob and t[0] not in V_lon else
                                    '#87CEFA' if t[0] not in V_lon else
                                    'blue'
                                    for t in V_c]
    visual_style["vertex_frame_color"] = \
        [visual_style["vertex_color"][i] if t[0] in Not_Sinks else 'black'
         for i, t in enumerate(V_c)]
    visual_style["vertex_frame_width"] = [2 for i in V_c]
    visual_style["vertex_size"] = [10 if t[0] in Not_Sinks else 20 for t in V_c]
    visual_style["edge_color"] = ['darkgrey' if e in set(E_lon.keys()) else
                                  'lightgrey'
                                  for e in E_]
    visual_style["edge_width"] = E_size
    visual_style["bbox"] = (0, 0, 1800, 1000)

    igraph.summary(g)
    image = igraph.plot(g, **visual_style)
    image.save(out_file + '.png')
    print("image ", out_file)
    pass
Beispiel #38
0
                   "To isotopy class" + "\t" + "Diagram" + "\n")

        for e in graph.es:
            if start_part in graph.vs[
                    e.source]["iso_class"] and end_part in graph.vs[
                        e.target]["iso_class"]:
                file.write(
                    str(graph.vs[e.source]["iso_class"]) + "\t" +
                    str(graph.vs[e.source]["gc"]) + "\t" +
                    str(graph.vs[e.target]["iso_class"]) + "\t" +
                    str(graph.vs[e.target]["gc"]) + "\n")
        file.close()

    print("-------------------")
    print("Full graph summary.")
    igraph.summary(graph)
    connectivity_list = []
    print("-------------------")
    #
    if not composites:
        to_delete_ids = [v.index for v in graph.vs if 'ICID' in v['iso_class']]
        graph.delete_vertices(to_delete_ids)

        print("Creating isotopy graph.")
        for e in graph.es:
            if 'ICID' not in graph.vs[
                    e.source]["iso_class"] or 'ICID' not in graph.vs[
                        e.target]["iso_class"]:
                connectivity_list.append((graph.vs[e.source]["iso_class"],
                                          graph.vs[e.target]["iso_class"]))
    else:
Beispiel #39
0
def sac1(graph):
    graph = graph.as_undirected()
    results = []

    attributes = [attribute_map[x] for i, x in enumerate(attribute_map.keys())]
    weights = [1 for x in range(0, graph.ecount())]

    graph.es["weight"] = weights
    graph.vs["sim"] = attributes
    #graph.vs["community"] = []

    for k in range(0, 15):
        membership = [(x) for x in range(0, graph.vcount())]
        membership_old = copy.copy(membership)
        clustering_old = igraph.VertexClustering(graph, membership)
        #igraph.plot(clustering_old)

        print(igraph.summary(clustering_old))

        #A pass
        for k in range(0, 15):
            starting_membership = copy.copy(membership)

            for vert in range(0, len(membership)):
                mod_results = []
                q_newman_cached = {}
                community_size = len(set(membership))

                vert_old = igraph.VertexClustering(graph,
                                                   membership=membership)
                mod_old = vert_old.modularity

                for vertj in range(0, len(membership)):
                    community = membership[vertj]

                    if community not in q_newman_cached:
                        membership_copy = copy.copy(membership)
                        membership_copy[vert] = community
                        community_size_new = len(set(membership_copy))
                        comm_indices = [
                            i for i, x in enumerate(membership)
                            if x == community
                        ]
                        comm_indices_new = [
                            i for i, x in enumerate(membership_copy)
                            if x == community
                        ]

                        vert_new = igraph.VertexClustering(
                            graph, membership=membership_copy)
                        mod_new = vert_new.modularity

                        modularity_diff = mod_new - mod_old

                        #if modularity_diff > 0:
                        #print "Modularity", modularity_new, "-", modularity_old, "=", modularity_diff
                        #print "Mod       ", mod_new, "-", mod_old, "=", modularity_diff

                        sim_result_old = similarity(graph, comm_indices)
                        sim_result_new = similarity(graph, comm_indices_new)

                        #print sim_result_old, sim_result_new

                        sim_result = (sim_result_new - sim_result_old)
                        q_newman = alpha * modularity_diff + (1 - alpha) * (
                            sim_result) / (math.pow(community_size_new, 2))
                        q_newman_cached[community] = q_newman
                        result = (community, q_newman)
                        mod_results.append(result)

                filtered_results = filter(lambda mod: mod[1] > 0, mod_results)
                filtered_results = list(filtered_results)

                if len(filtered_results) > 0:
                    sorted_results = sorted(filtered_results,
                                            key=itemgetter(1),
                                            reverse=True)
                    membership[vert] = sorted_results[0][0]

            diff = reduce(
                lambda x, y: x + y,
                list(
                    map(lambda x: 1 if x[0] != x[1] else 0,
                        zip(starting_membership, membership))), 0)
            print("Membership diff of", diff)

            if starting_membership == membership:
                print("No further changes can be made")
                break

        if len(results) != 0 and results[len(results) - 1] == membership:
            print("No further improvements, finished on ", k)
            break

        previous_communities = None
        if "community" in set(graph.vertex_attributes()):
            previous_communities = {
                i: e
                for i, e in enumerate(graph.vs["community"])
            }
            #print previous_communities

        results.append(copy.copy(membership))
        optimal_membership = copy.copy(membership)

        #Rename optimal membership so it'll remove nodes, communities should be 0 to n.
        for k, x in enumerate(sorted(set(optimal_membership))):
            for l, y in enumerate(optimal_membership):
                if x == y:
                    optimal_membership[l] = k

        print(optimal_membership)
        combinations = {"sim": lambda x: sum_attributes(x)}
        graph.contract_vertices(optimal_membership, combine_attrs=combinations)

        community_dict = defaultdict(list)

        for k, x in enumerate(optimal_membership):
            community_dict[x].append(k)

        if previous_communities is None:
            community_list = [set(community_dict[l]) for l in community_dict]
        else:
            community_list = [[
                previous_communities[c] for c in community_dict[l]
            ] for l in community_dict]
            community_list = list(
                map(lambda x: [item for sublist in x for item in sublist],
                    community_list))
            print(community_list)

        graph.vs["community"] = community_list
        graph.simplify(combine_edges=dict(weight="sum"),
                       multiple=True,
                       loops=False)

    return graph.vs["community"]
Beispiel #40
0
#igraph.summary(c.Graph)
#c.make_Graph(15, 200, prob = 0.1)
#igraph.summary(c.Graph)
#print len(c.clique_list)
#g = c.Graph.copy()
#d = DataPolishing(g)
#igraph.summary(d.Graph)
#print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "randam_clique_5000.gml")
#d.data_polish(polish_ratio = pr)
#igraph.summary(d.Graph)
#print len(d.Graph.maximal_cliques(min = 3))
#igraph.write(d.Graph, "polished_clique_5000.gml")
#print "recall = " , c.recall(d.Graph)
#print "precision = " , c.precision(d.Graph)
#print "accuracy = " , c.accuracy(d.Graph)

g = igraph.read("twitter_graph.gml")
a = DataPolishing(g)
print "original:"
igraph.summary(a.Graph)
print len(a.Graph.maximal_cliques(min=3))

a.data_polish(polish_ratio=pr)

print "polished"
igraph.summary(a.Graph)
print len(a.Graph.maximal_cliques(min=3))

igraph.write(a.Graph, "polished_twitter_grapht.gml")
      valid_sequences += 1
    loss = train_batch(model, X_couples, y_labels)
    losses += loss
    if epoch % print_every == 0:
      logging.info("Mean loss in Epoch [%s] with %s valid sequences = %s" % (epoch, valid_sequences, losses / valid_sequences))
      losses, valid_sequences = 0.0, 0


if __name__ == "__main__":
  #g = Graph.Read_Edgelist("deepwalk/p2p-Gnutella08.edgelist")
  g = load_adjlist("deepwalk/karate.adjlist", directed=False)
  vocab_size = len(g.vs)
  max_len = 5
  save = True
  sampling_table = make_sampling_table(vocab_size)
  degrees = np.array(g.vs.degree())
  inv_sqrt_degree = 1/np.sqrt(degrees)
  sampling_table = inv_sqrt_degree/np.sum(inv_sqrt_degree)
  logging.info("Graph Summary: \n", summary(g))
  logging.info("Building Model")
  if save:
    model = cPickle.load(open("out/Karate.Model.3100.pkl"))
  else:
    model = cPickle.load("out/Karate.Model.3100.pkl")
    model = Sequential()
    model.add(WordContextProduct(vocab_size, proj_dim=300, init='uniform'))
    model.compile(loss='binary_crossentropy', optimizer='rmsprop')
    #couples, labels = skipgrams(sequences[np.random.randint(vocab_size)], vocab_size, window_size=4, negative_samples=1.0, sampling_table=sampling_table)
    #train_on_model(model, g, vocab_size, print_every=1)
    #cPickle.dump(model, open("out/Karate.Model.3100.pkl", "wb"))
edges = list()
vertices = set()

for result in result1:
    vertices.add(str(result["leuven"]["name"]))
    vertices.add(str(result["brugge"]["name"]))
    vertices.add(str(result["bst"]["name"]))
    vertices.add(str(result["lst"]["name"]))
    vertices.add(str(result["ls"]["name"]))
    vertices.add(str(result["bs"]["name"]))
    edges.append([str(result["leuven"]["name"]), str(result["brugge"]["name"])])

g1.add_vertices(list(vertices))
g1.add_edges(edges)  

ig.summary(g1)
g1.is_weighted()

# Get the attributes and the number of nodes
print(g1.vs.attribute_names())
print(g1.vcount())

# Get the attributes and the number of edges
print(g1.es.attribute_names())
print(g1.ecount())              
                      
# Calculate layout
layout_fr = g1.layout("fr")

# Define style from network plotting
visual_style = {}
def gen_graph_from_nol(fncol):
    g = Graph()
    g = g.Read_Ncol(fncol, names=True, weights=True, directed=True)
    print summary(g)
    return g
Beispiel #44
0
# ==============================
# Athout: Eugene Seo
# Date: 10.21.2016
# Description: CS519 Homework 1
# ==============================
import pandas
hs = pandas.read_csv("hsmetnet.txt", sep="\t", names=["v_left", "v_right"])

from igraph import *
from igraph import summary
meta_graph = Graph.TupleList(hs.values.tolist(), directed=True)
summary(meta_graph)

from collections import defaultdict
metabolite_set = set()
reaction_set = set()
metabolite_degree = defaultdict(int)
metabolite_idx = []
for v in meta_graph.vs:
    if 'REACTION' in v['name']:
        reaction_set.add(v)
        continue
    else:
        metabolite_degree[v['name']] = v.degree()
        metabolite_idx.append(v.index)
        metabolite_set.add(v)

print "A. number of distinct metabolities:", len(metabolite_set)
print "A. number of distinct reactions:", len(reaction_set)
print "A. number of edges:", hs.shape[0]
Beispiel #45
0
def read_edges(f_name):
    print(f_name)
    g = ig.Graph.Read_Ncol(f_name, names=True, directed=False)
    ig.summary(g)
    return g
Beispiel #46
0

def gen_random_paths(count):
    sum = 0

    for i in xrange(count):
        t = gen_random_path()
        sum += t
        print i, t

    print
    print 'avg', sum/count, 'seconds'

def gen_random_weighed_paths(count):
    sum = 0

    for i in xrange(count):
        t, skips = gen_random_weighted_path4()
        sum += t
        print i, t, skips

    print
    print 'avg', sum/count, 'seconds'


#gen_random_paths(1000)
ig.summary(G)
gen_random_weighed_paths(200)
ig.summary(G)
print 'is weigted', G.is_weighted()
            i = i - 1
        #east
        i = j + 1
        while i < len(rowlist) and rowlist[i][0] - rowlist[j][0] < detectrange:
            #checkcontact_minute(i,j,rowlist,cg,row)
            checkcontact(i, j, rowlist, cg, row, radius, exposurelimit)
            #checkcontact_merge(i,j,rowlist,cg,row,table)
            i = i + 1


print("Loading matrix.npy")
table = np.load("matrix.npy")

radius = 50
exposurelimit = 3

cg = igraph.Graph()

cg.add_vertices(table.shape[1])
print("Building Graph")
t1 = time.time()
for x in range(table.shape[0]):
    processrow(table, x, cg, radius, exposurelimit)
print("Time: ", time.time() - t1)
name = "contactgraph" + str(radius) + "." + str(exposurelimit) + ".zip"
cg.write_graphmlz(name)

#cg = cg.Read_GraphMLz("contactgraph10.5.zip")

igraph.summary(cg)
import igraph
import csv


import sys

if __name__=='__main__':

	input_file=sys.argv[1]
	output_file=sys.argv[2]
	g=igraph.Graph.Read_Ncol(input_file, directed=True)
	g.summary()

	print igraph.summary(g)
	estimate = g.constraint()

	with open(output_file, 'wb') as fout:
		outcsv = csv.writer(fout)
		for v in g.vs:
			outcsv.writerow([v["name"], estimate[v.index]])

Beispiel #49
0
# In[1]:

import pandas
edge_list_ecoli = pandas.read_csv("ecolitfnet.txt",
                                  sep="\t",
                                  names=["source", "target"])
edge_list_ecoli.head(n=6)

# Step 2. Make an igraph directed graph from the network; print a graph summary as a sanity check.

# In[2]:

from igraph import Graph
from igraph import summary
ecoli_graph = Graph.TupleList(edge_list_ecoli.values.tolist(), directed=True)
summary(ecoli_graph)

# Q1. Which one of connected 3-vertex motifs is most fequent in the E. coli regularoty network?

# In[3]:

import numpy as np
three_vertex_motifs_counts = ecoli_graph.motifs_randesu(size=3)
print np.nanargmax(three_vertex_motifs_counts)

# Q2. Which one of these motifs has a count of 47 in the regularoty network? (FFL)

# In[4]:

print three_vertex_motifs_counts.index(47)
Beispiel #50
0
if os.path.isfile('edgelist_%s.txt' % KEYWORD): os.remove('edgelist_%s.txt' % KEYWORD)
if os.path.isfile('nodes_%s.txt' % KEYWORD): os.remove('nodes_%s.txt' % KEYWORD)

os.system('make edgelist_%s.txt' % KEYWORD)
os.system('make nodes_%s.txt' % KEYWORD)

import igraph
g = igraph.Graph.Read_Ncol('edgelist_%s.txt' % KEYWORD, directed=False)

mapping = {}
with open('nodes_%s.txt' % KEYWORD) as f:
	for row in f.read().splitlines():
		mapping[row.split()[0]] = row.decode('utf-8', 'ignore').encode('ascii', 'ignore')

# Read_Edgelist
igraph.summary(g)
g.simplify()
igraph.summary(g)
# http://stackoverflow.com/questions/9471906/what-are-the-differences-between-community-detection-algorithms-in-igraph

if ALG == "infomap":
	comms = g.community_infomap() # 12
elif ALG == "fastgreedy":
	comms = g.community_fastgreedy().as_clustering() # CNM 4
elif ALG == "multilevel":
	comms = g.community_multilevel() # louvain, 5

# comms = g.community_edge_betweenness(directed=False).as_clustering() # TOO SLOW
# comms = g.community_label_propagation()  # too few communities?
# comms = g.community_leading_eigenvector() # 5
# comms = g.community_optimal_modularity() # too slow?
    def generateGraph(self, netLogoWorld, fileNameOut, nodeSizeMultiplyer, coordMultiplyer, edgeWeightMultiplyer, edgeWeightIgnore, nodeNamePrefix):
        self.netLogoWorld = netLogoWorld;
        self.fileNameOut = fileNameOut;
        print("Generating graph started ...");
        print("nodeSizeMultiplyer=%f, coordMultiplyer=%f, edgeWeightMultiplyer=%f, edgeWeightIgnore=%s, , nodeNamePrefix='%s'" \
              % (nodeSizeMultiplyer, coordMultiplyer, edgeWeightMultiplyer, edgeWeightIgnore, nodeNamePrefix))
        print "Igraph version %s" % (igraph.__version__);
        self.graph = igraph.Graph();

        # populating graph nodes from turtles
        self.graph.add_vertices(len(self.netLogoWorld.turtles));
        turtle = Turtle();
        i = 0;
        for turtle in self.netLogoWorld.turtles:
            #print("Turtle: who=%d, label=%s" %(turtle.who, turtle.label))
            # We cannot use id:
            #    self.graph.vs[i]['id'] = turtle.who;
            # it was necesarry to add name to be able to refer to names of edges when we are adding edges later
            # that is only possible way, since vertex ids (turtles who) are not necessarily starting from 0, and igrah insist on 0 and non-sparce vertices ids
            self.graph.vs[i]['name'] = str(turtle.who);
            self.graph.vs[i]['size'] = turtle.size * nodeSizeMultiplyer;
            rgbColor = NetLogoWorld.colorNetlotoToRgb(turtle.color);
            self.graph.vs[i]['r'] = rgbColor[0];
            self.graph.vs[i]['g'] = rgbColor[1];
            self.graph.vs[i]['b'] = rgbColor[2];
            self.graph.vs[i]['x'] = turtle.xcor * coordMultiplyer;
            self.graph.vs[i]['y'] = turtle.ycor * coordMultiplyer;
            if(turtle.label == None or turtle.label == ""):
                self.graph.vs[i]['label'] = "%s%d" % (nodeNamePrefix, turtle.who);
            else:
                self.graph.vs[i]['label'] = turtle.label;

            #self.graph.vs[i]['hophop'] = 'YESSS!!!';

            # adding additional non-recognized columns
            #print "keys:%s " %(turtle.additionalParams.keys());
            print(turtle.additionalParams.keys())
            for columnName in turtle.additionalParams.keys():
                columnType = turtle.columnTypes[columnName];
                print("columnName=%s, columnType=%s, value=%s" %(columnName, columnType, turtle.additionalParams[columnName]));
                
                if(columnType == "string"):
                    self.graph.vs[i][columnName] = str(turtle.additionalParams[columnName]);
                elif(columnType == "int"):
                    self.graph.vs[i][columnName] = int(turtle.additionalParams[columnName]);
                elif(columnType == "float"):
                    self.graph.vs[i][columnName] = float(turtle.additionalParams[columnName]);
            i =i+1;

        # populating edges nodes from links
        link = Link();
        i = 0;
        for link in self.netLogoWorld.links:            
            #print("link.end1 = %s, link.end2=%s" % (str(link.end1), str(link.end2)));
            #print self.graph;
            #print self.graph.get_edgelist();
            # we cannot add by integers
            #    self.graph.add_edges([(link.end1, link.end2)]);
            # because, that is recognized as igraph's vertex IDs, which do not need to match NetLogo turtle WHOs (if they do not start from 0)
            # There fore we need to refer by vertex names, and to do that we need to provide .add_edges() with strings instead of integers
            self.graph.add_edges([(str(link.end1), str(link.end2))]);
            self.graph.es[i]['Edge Id'] = link.end1 * 1000 + link.end2;
            if(link.label == None or link.label == ""):
                self.graph.es[i]['Edge Label'] = self.graph.es[i]['label'] = "%d-%d" % (link.end1, link.end2);
            else:
                self.graph.es[i]['Edge Label'] = self.graph.es[i]['label'] = turtle.label;
            if(not edgeWeightIgnore):
                self.graph.es[i]['weight'] = link.thickness*edgeWeightMultiplyer;
            
            print(link.additionalParams.keys())
            #print link.additionalParams.keys();
            for columnName in link.additionalParams.keys():
                columnType = link.columnTypes[columnName];
                print("columnName=%s, columnType=%s, value=%s" %(columnName, columnType, link.additionalParams[columnName]));
                
                if(columnType == "string"):
                    self.graph.es[i][columnName] = str(link.additionalParams[columnName]);
                elif(columnType == "int"):
                    self.graph.es[i][columnName] = int(link.additionalParams[columnName]);
                elif(columnType == "float"):
                    self.graph.es[i][columnName] = float(link.additionalParams[columnName]);
            i =i+1;

        igraph.summary(self.graph);
        self.graph.write_graphml(fileNameOut);
        print("Generating graph finished ...");
	def iGraph(self, debug = False, mode = "authorities", output = "ehri.graphml", direct = False):#Let's create the graph
		#We need its number of nodes first
		graphLength = len(set(self.index["items"]))

		if mode == "authorities":
			graphLength += len(self.index["authorities"])

		#Just checking
		if debug == True:
			print "Graph vertices : " + str(graphLength)

		#Create its instance
		g = igraph.Graph(graphLength, directed = direct)

		#Now we needs names of stuff, lets call it labels
		labels = [item for item in self.index["items"]]

		#We do miss labels of authorities, dont we ?
		if mode == "authorities":
			labels += self.index["authorities"]

		#Have we got same number than graphLength ?
		if debug==True:
			print "Labels length " + str(len(labels))

		#Just to be sure :
		if len(labels) != graphLength:
			print "Not the same number of names and labels you fool"
			print "So you shall not pass"
			sys.exit()
		
		#We create another thing : we save index of items and labels in a dictionary, because that's why
		index = {}
		for name in labels:
			index[name] = len(index)
		#Isn't it beautiful ?

		#So now, we can add labels to our graph 
		g.vs["label"] = labels

		#Would be nice to connect it...
		#Hello EDGES
		edges = []
		weight = []
		for i in self.index["items"]:
			for a in self.index["items"][i]:
				if mode == "cluster":
					edges.append((index[i], index[a[0]]))
					weight.append(a[1])
				else:
					edges.append((index[i], index[a]))
			
		g.add_edges(edges)
		if mode == "cluster":
			g.es["weight"] = weight

		#A little sum-up ?
		if debug == True:
			igraph.summary(g)
		
		try:
			if mode != "cluster":
				#Let's try to make some community out of it...
				d = g.community_fastgreedy()
				cl = d.as_clustering()
				#Let's save this clusterization into an attribute
				g.vs["fastgreedy"] = cl.membership
				#Sping glass not possible
		except:
			print "Fast greedy not working. Multi edges graph ?"


		#And do that with other clusterization modules
		d = g.community_walktrap()
		cl = d.as_clustering()
		#Let's save this clusterization into an attribute
		g.vs["walktrap"] = cl.membership

		g.save(output)
def BuildGraph(start_date, end_date,
               site_info = None, tags = [],
               start_page = 1,
               graph = None):
    """ Builds a graph from data from StackExchange with the specified
        site and tags, between the start and end date. The graphs's
        vertices are tags and edges are placed when two tags appear on
        the same question.
        Can also add to an existing graph. This and the start_page argument
        are useful if quota limits prevented grabbing all the data """

    if not graph:
        # Create a new graph
        graph = igraph.Graph(directed = False)
    graph.es['weight'] = 1.0 # make the graph weighted

    if not site_info:
        site_info = {'site': 'stackoverflow',
                     'base_url': 'http://api.stackexchange.com',
                     'sleep_time': 0.035} # s, since limited to 30 requests/s}

    # Keep track of how we've grab from StackExchange
    # and if we've grabbed everything
    quota_remaining = 10000
    has_more = True
    page = start_page
    count = 0

    while quota_remaining > 0 and has_more:
        # Grab a bunch of questions
        url = GetQuestionRequestString(page, start_date, end_date,
                                       tags, site_info)
        r = requests.get(url)
        data = r.json()

        # handle throttling
        # the API can tell us to back off, otherwise use a default time
        sleep_time = float(data.get('backoff', site_info['sleep_time']))
        time.sleep(site_info['sleep_time']) # for throttling

        # process the questions and add to graph
        questions = data.get('items', []) # empty list if no questions returned
        for question in questions:
            AddQuestionToGraph(question, graph)
            count += 1

        # update for next iteration
        if quota_remaining > 0:
            page += 1
        has_more = data['has_more']
        quota_remaining = data['quota_remaining']

    print "Processed %i pages and %i questions."%(page, count)
    if quota_remaining == 0:
        print "   but ran out of quota."
    else:
        print "   (%i quota left)"%(quota_remaining)
    print "Graph summary:"
    igraph.summary(graph)

    return graph