def build_graph(connList, vertexNames, outfile=None): """Turn connectivity list and vertex names into iGraph object The connectivity list must be a pickle containing an array with three cols, each containing the source, target and edge weight respectively. The edge weight is supposed to be the numer of relations between source and target. This weight is referred to as the rel_weight (as opposed to the deg_weight) The vertexNames is also a pickle with a dictionary where every uri is a key whose value is the corresponding vertex id (just an integer). Note that the vertex id's are NOT preserved during conversion: iGraph creates new ids on the fly (numbering vertices in the order they appear in the edgelist). Args: - connList: filename of the pickle file containing a matrix, each row containing source, target, weight - vertexNames: filename of a pickle containing a dictionary with uri's as keys and vertex ids as values - outfile: filename where a GraphML version of the graph will be stored If outfile=None, then the graph object is only returned Returns - the iGraph graph object - stores an GraphML file if outfile is set. """ print('(1/6) Starting conversion, could take a while...') connList = pickle.load(open(connList,'rb')) connArr = np.array(connList, dtype='int') del connList vertexNames = pickle.load(open(vertexNames,'rb')) print('(2/6) Loading of pickle files completed.') edges = [{'source':s,'target':t,'rel_weight':w} for s,t,w in connArr] vertices = [{'orig_id':int(id),'uri':uri} for uri,id in vertexNames.iteritems()] print ('(3/6) Cleaning edges and vertices completed') graph = ig.Graph.DictList(vertices,edges, vertex_name_attr='orig_id') del graph.es['source'] del graph.es['target'] print ('(4/7) iGraph object created') degrees=graph.degree() for e in graph.es: e['deg_weight'] = np.log(degrees[e.source]) + np.log(degrees[e.target]) print ('(5/6) Finished calculating degree based weights') print('Summary of the graph:') ig.summary(graph) if outfile: graph.write_graphml(outfile) print('(6/6) GraphML file "'+outfile+'" saved; finished!') else: print('(6/6) Finished!') return graph
def vizsgalat(net): N=net.vcount() igraph.summary(net) cc=net.components() comp=len(cc.sizes()) elek=net.ecount() if comp == 1 and elek == N-1: kod = pruf(net) print "Ez egy fa!" else: kod = None print kod print "" print "" return kod
def testLoadSaveGraph(self, fileNameIn, fileNameOut): print("testLoadSaveGraph started ..."); print "Igraph version %s" % (igraph.__version__); self.graph = igraph.Graph.Read_GraphML(fileNameIn); print("node parameters(%s): %s" %('id', self.graph.vs['id'])); print("node parameters(%s): %s" %('label', self.graph.vs['label'])); print("edge parameters(%s): %s" %('Edge Id', self.graph.es['Edge Id'])); print("edge parameters(%s): %s" %('Edge Label', self.graph.es['Edge Label'])); print("edge parameters(%s): %s" %('weight', self.graph.es['weight'])); print("edge parameters(edge %d): %s" %(0, self.graph.es[0])); # fixing edge label self.graph.es['label'] = self.graph.es['Edge Label']; igraph.summary(self.graph); self.graph.write_graphml(fileNameOut); print("testLoadSaveGraph finished ...");
def contactsGraph(): fileName = path + "connections-28-11-12" vertexIdDict = {} vertexIdSet = set([]) edgeSet = set([]) edgeArray = [] graph = igraph.Graph() i = 0 j = 0 with open(fileName) as f: f.readline() for line in f: if i % 50000 == 0: print(i) words = line.split() vId1 = int(words[0]) vId2 = int(words[1]) if vId1 not in vertexIdSet: vertexIdDict[vId1] = j vertexIdSet.add(vId1) j += 1 if vId2 not in vertexIdSet: vertexIdDict[vId2] = j vertexIdSet.add(vId2) j += 1 if (vertexIdDict[vId1], vertexIdDict[vId2]) not in edgeSet and (vertexIdDict[vId2], vertexIdDict[vId1]) not in edgeSet: edgeArray.append([vertexIdDict[vId1], vertexIdDict[vId2]]) edgeSet.add((vertexIdDict[vId1], vertexIdDict[vId2])) i += 1 print("Read " + str(i) + " lines with " + str(j) + " vertices") graph.add_vertices(j) graph.add_edges(edgeArray) print(igraph.summary(graph)) graphStats = GraphStatistics() statsArray = graphStats.scalarStatistics(graph, slowStats=False) print(graphStats.strScalarStatsArray(statsArray)) xs, ys = zip(*[(left, count) for left, _, count in graph.degree_distribution().bins()]) plt.figure(0) plt.bar(xs[0:30], ys[0:30]) plt.xlabel("Degree") xs, ys = zip(*[(left, count) for left, _, count in graph.components().size_histogram().bins()]) plt.figure(1) plt.bar(xs[0:30], ys[0:30]) plt.xlabel("Component size") plt.show()
def fullCoauthorGraph(): fileName = path + "coauthorsGraph" graph = igraph.Graph() graph = graph.Read_Edgelist(fileName) graph = graph.as_undirected() print(igraph.summary(graph)) graphStats = GraphStatistics() statsArray = graphStats.scalarStatistics(graph, slowStats=False) print(graphStats.strScalarStatsArray(statsArray))
def make_projection(graph, atts): """ makes bipartite projections, returns seller projection""" # PREPARE EDGE ATTRIBUTES graph.es['val'] = list(atts['vals']) graph.es['hs'] = list(atts['hs']) graph.es['dest'] = list(atts['dest']) graph.es['hss'] = list(atts['hss']) graph.es['dest_source'] = list(atts['dest_source']) graph.es['imp_name'] = list(atts['imp_name']) # PREPARE VERTEX ATTRIBUTES # The strength member function sums all of the edge values graph.vs['val'] = graph.strength(graph.vs, weights='val') # Get list of exporters who sell to the US us_list = what_sellers(graph.es, 'USA') graph.vs['US'] = 0 graph.vs[us_list]['US'] = 1 # Get list of exporters who sell to a seleted foreign coutnry us_list = what_sellers(graph.es, 'VEN') graph.vs['VEN'] = 0 graph.vs[us_list]['VEN'] = 1 # Get most frequent hs by exporter hs_tup = source_hs(graph.es,'hss') graph.vs['hs_source'] = 0 graph.vs[hs_tup[0]]['hs_source'] = hs_tup[1] # Get most frequent destimation dest_tup = source_hs(graph.es,'dest_source') graph.vs['dest_source'] = 0 graph.vs[dest_tup[0]]['dest_source'] = dest_tup[1] # SIZES FROM graph.csv size = 10046 edge_size = 58031 big_size = 40789 sub = size # MAKE THE TWO TYPES (SELLER AND BUYER) graph.vs['type'] = [1] * big_size graph.vs[sub:]['type'] = [0] * (big_size - sub) # PROEJECT AND ADD ATTRIBUTES proj2, proj1 = graph.bipartite_projection() proj1.vs['val'] = graph.vs[0:sub]['val'] proj1.vs['val'] = graph.vs[0:sub]['val'] # Get most valuable importer max_imp = pd.read_pickle('max_imp.pickle') proj1.vs['imp_name'] = max_imp # WRITE AND READ proj1.write_pickle('proj1.pickle') proj1 = ig.read('proj1.pickle') print(ig.summary(proj1)) return proj1, proj2
def testCreateSaveGraph(self, fileNameOut): print("testCreateSaveGraph started ..."); print "Igraph version %s" % (igraph.__version__); self.graph = igraph.Graph(); self.graph.add_vertices(3); self.graph.add_edges([(0,1), (1,2)]); self.graph.vs['id'] = [5, 7, 9]; self.graph.vs['size'] = [50, 30, 40]; self.graph.vs['r'] = [255, 255, 0]; self.graph.vs['g'] = [0, 0, 0]; self.graph.vs['b'] = [0, 0, 255]; self.graph.vs['x'] = [0, 100, 100]; self.graph.vs['y'] = [0, 0, 100]; self.graph.vs['label'] = ["Nada", "Zhenia", "Sasha"]; self.graph.es['Edge Id'] = [57, 79]; self.graph.es['Edge Label'] = ['Nada-Zhenia', 'Zhenia-Sasha']; self.graph.es['label'] = ['Nada-Zhenia-l', 'Zhenia-Sasha-l']; self.graph.es['weight'] = [1, 5]; igraph.summary(self.graph); self.graph.write_graphml(fileNameOut); print("testCreateSaveGraph finished ...");
def articleGroupsGraph(): fileName = path + "articleGroupMembership-28-11-12" graph = readBipartiteGraph(fileName) print(igraph.summary(graph)) graphStats = GraphStatistics() statsArray = graphStats.scalarStatistics(graph, slowStats=False) print(graphStats.strScalarStatsArray(statsArray)) xs, ys = zip(*[(left, count) for left, _, count in graph.degree_distribution().bins()]) plt.figure(0) plt.bar(xs[0:30], ys[0:30]) plt.xlabel("Degree") xs, ys = zip(*[(left, count) for left, _, count in graph.components().size_histogram().bins()]) plt.figure(1) plt.bar(xs[0:30], ys[0:30]) plt.xlabel("Component size") plt.show()
#!/usr/bin/env python import igraph as ig g = ig.Graph.Read_Ncol("data/200_edges_no_dups.ncol", directed=False) ig.summary(g) layout = g.layout("kk") style = dict() style["vertex_size"] = 10 style["edge_width"] = [1 + w/1000 for w in g.es["weight"]] style["layout"] = layout ig.plot(g, **style)
import igraph import numpy as np matrix = [[0,1,2],[1,0,0],[2,0,0]] g = igraph.Graph.Adjacency(matrix) print g.get_edgelist() igraph.summary(g) print g.get_edgelist()[0]
from igraph import Graph, summary from igraph import plot as iplot # iplot néven, hogy ne keveredjen a pylab.plot függvénnyel # Programfájlban kellene még ez a sor is a -pylab opció helyett: from pylab import plot, average, array, grid, xlabel, ylabel, legend, show # vagy egyszerűen from pylab import * # és minden plot függvény után kellene # show() függvény. import pylab # esetén pedig pylab.függvény() alakban hívhatóak a pylab függvényei. nw = Graph.Erdos_Renyi(1000, .001) summary(nw) M=nw.ecount() N = nw.vcount() Mmax = N*(N-1)/2 Mmax M/Mmax p = 1.*M/Mmax nw.diameter() nw.components() cc=nw.components() # (összefüggő) komponensek, (connected) components ccs = cc.sizes() max(ccs) average(ccs)
from igraph import Graph, summary from igraph import plot as iplot # iplot néven, hogy ne keveredjen a pylab.plot függvénnyel # Programfájlban kellene még ez a sor is a -pylab opció helyett: from pylab import plot, average, array, grid, xlabel, ylabel, legend, show # vagy egyszerűen from pylab import * # és minden plot függvény után kellene # show() függvény. import pylab # esetén pedig pylab.függvény() alakban hívhatóak a pylab függvényei. net = Graph.Erdos_Renyi(1000, .001) summary(net) M=net.ecount() N = net.vcount() Mmax = N*(N-1)/2 Mmax M/Mmax p = 1.*M/Mmax net.diameter() net.components() cc=net.components() # (összefüggő) komponensek, (connected) components ccs = cc.sizes() max(ccs) average(ccs)
############################################################### Q 11 #from Q6 get GCC vertex import os os.chdir('/Users/Ray/Desktop/ECE232_HW5/2/dataset/preprocess') import igraph as ig g = ig.Graph.Read(f='preprocessed.txt', format='ncol', directed=False) gcc = g.components().giant() ig.summary(gcc) vertexID_list = [] for i in range(len(gcc.vs)): vertexID_list.append(int(gcc.vs[i]['name'])) import json import pandas as pd import numpy as np os.chdir('/Users/Ray/Desktop/ECE232_HW5') with open('san_francisco_censustracts.json') as f: geoBound = json.load(f) cood_mean_arr = np.zeros((len(geoBound['features']), 2)) # use mean coordinate to represent the node for i in range(len(geoBound['features'])): cood_mean_arr[i, :] = np.mean( geoBound['features'][i]['geometry']['coordinates'][0][0], axis=0) import matplotlib.pyplot as plt from scipy.spatial import Delaunay cood_mean_arr_gcc = cood_mean_arr[[x - 1 for x in vertexID_list ], :] #fit to cood_mean
#igraph.summary(c.Graph) #c.make_Graph(15, 200, prob = 0.1) #igraph.summary(c.Graph) #print len(c.clique_list) #g = c.Graph.copy() #d = DataPolishing(g) #igraph.summary(d.Graph) #print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "randam_clique_5000.gml") #d.data_polish(polish_ratio = pr) #igraph.summary(d.Graph) #print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "polished_clique_5000.gml") #print "recall = " , c.recall(d.Graph) #print "precision = " , c.precision(d.Graph) #print "accuracy = " , c.accuracy(d.Graph) g = igraph.read("twitter_graph.gml") a = DataPolishing(g) print "original:" igraph.summary(a.Graph) print len(a.Graph.maximal_cliques(min = 3)) a.data_polish(polish_ratio = pr) print "polished" igraph.summary(a.Graph) print len(a.Graph.maximal_cliques(min = 3)) igraph.write(a.Graph, "polished_twitter_grapht.gml")
return dict(rem_deg) edgefiles = infile f = open(outfile, 'w') for fil in gb.glob(edgefiles): #Read the edge list; Change parameter weights and directed for network of study p = ig.Graph.Read_Ncol(fil, weights=weight, directed=False, names=True) #number of nodes num_nodes = p.vcount() ig.summary(p) dg_dis = p.degree() dg_dis_count = [] for i in set(dg_dis): dg_dis_count.append((i, dg_dis.count(i))) edg = entropy_dg(dict(dg_dis_count)) print('\nEntropy of degree distribution for the given network is ', edg) a = rem_deg_dist(dict(dg_dis_count)) erdg = entropy_rdg(a) print( '\nEntropy of remaining degree distribution for the given network is ', erdg, '\n') f.write(fil + '\t' + str(erdg) + '\n')
def main(): node_limit, detection, weight_threshold, density, measure, start_date, end_date, theme, plot_network = get_params() experiment_parameters = (get_params()) print("Sample limit: {}".format(node_limit)) print("Community detection: {}".format(detection)) print("Edge weight threshold: {}".format(weight_threshold)) # %% Read data path = 'resources/votos_31-01-2019_to_30-12-2020.csv' df = pd.read_csv(path) basename = ntpath.basename(path) print(basename) random.seed(0) if theme is not None: df = filter_by_theme(df, theme, start_date, end_date) df, reps = filter_by_name_and_quantity(df, node_limit) rep_to_ind = {reps[i]: i for i in range(len(reps))} motions = df['idVotacao'].unique() motion_to_ind = {motions[i]: i for i in range(len(motions))} parties = [p for p in df['deputado_siglaPartido'].unique() if pd.notna(p)] edges = [] vote_matrix = np.zeros((len(reps), len(motions))) df_grouped = df.groupby(['idVotacao', 'deputado_nome']) for group, df_group in df_grouped: voto = df_group['voto'].values[0] i = rep_to_ind[group[1]] j = motion_to_ind[group[0]] if voto == "Sim": vote_matrix[i,j] = 1 if voto == "Não": vote_matrix[i,j] = -1 if measure == 'generalized': M = generalized_similarity(vote_matrix) elif measure == 'pearson': M = pearson_correlation(vote_matrix) else: raise NotImplementedError for dep1, dep2 in combinations(range(len(reps)), 2): if M[dep1,dep2] > 0: edges.append(((dep1,dep2), M[dep1,dep2])) #plot_similarity_distribution([e[1] for e in edges if e[1] > 0.99], weight_threshold) g = Graph(graph_attrs={'name': 'Camera dos Deputados'}, directed=False) g.add_vertices(reps) edges, weights = filter_edges(edges, num_nodes=g.vcount(), threshold=weight_threshold, density=density) g.add_edges(edges) g.es['weight'] = weights # Normalize weights to [0,1] maxw = max(g.es['weight']) minw = min(g.es['weight']) g.es['weight'] = [(e - minw) / (maxw - minw) for e in g.es['weight']] summary(g) if detection == 'leiden': communities = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition, weights='weight', n_iterations=100).membership #communities = g.community_leiden(objective_function='modularity', weights='weight', n_iterations=100) elif detection == 'spinglass': communities = g.community_spinglass(weights='weight').membership elif detection == 'multilevel': communities = g.community_multilevel(weights='weight').membership elif detection == 'party': communities = groups_by_party(df, reps, parties) else: raise NotImplementedError modularity = g.modularity(communities, 'weight') print("Modularity Score: ", modularity) save_modularity(modularity, theme, start_date, end_date) g.vs['partido'] = [parties[i] for i in groups_by_party(df, reps, parties)] g.vs['url_foto'] = [df[df['deputado_nome'] == dep]['deputado_urlFoto'].values[0] for dep in g.vs['name']] g.vs['uf'] = [df[df['deputado_nome'] == dep]['deputado_siglaUf'].values[0] for dep in g.vs['name']] g.save('graphs/g.graphml') degrees, betweenness, closeness, clustering_coef = collect_metrics(g, experiment_parameters) if plot_network: period = start_date + '_to_' + end_date draw_vis(g, groups=communities, parties=parties, theme=theme, period=period, degrees=degrees, betweenness=betweenness, closeness=closeness, clustering_coef=clustering_coef)
def sac1(graph): results = [] attributes = [attribute_map[x] for i, x in enumerate(attribute_map.keys())] weights = [1 for x in range(0, graph.ecount())] graph.es["weight"] = weights graph.vs["sim"] = attributes #graph.vs["community"] = [] for k in range(0, 15): membership = [(x) for x in range(0, graph.vcount())] membership_old = copy.copy(membership) clustering_old = igraph.VertexClustering(graph, membership) #igraph.plot(clustering_old) print igraph.summary(clustering_old) #A pass for k in range(0, 15): starting_membership = copy.copy(membership) for vert in range(0, len(membership)): mod_results = [] q_newman_cached = {} community_size = len(set(membership)) vert_old = igraph.VertexClustering(graph, membership=membership) mod_old = vert_old.modularity for vertj in range(0, len(membership)): community = membership[vertj] if community not in q_newman_cached: membership_copy = copy.copy(membership) membership_copy[vert] = community community_size_new = len(set(membership_copy)) comm_indices = [i for i, x in enumerate(membership) if x == community] comm_indices_new = [i for i, x in enumerate(membership_copy) if x == community] vert_new = igraph.VertexClustering(graph, membership=membership_copy) mod_new = vert_new.modularity modularity_diff = mod_new - mod_old #if modularity_diff > 0: #print "Modularity", modularity_new, "-", modularity_old, "=", modularity_diff #print "Mod ", mod_new, "-", mod_old, "=", modularity_diff sim_result_old = simularity(graph, comm_indices) sim_result_new = simularity(graph, comm_indices_new) #print sim_result_old, sim_result_new sim_result = (sim_result_new - sim_result_old) q_newman = alpha*modularity_diff + (1-alpha)*(sim_result)/(math.pow(community_size_new, 2)) q_newman_cached[community] = q_newman result = (community, q_newman) mod_results.append(result) filtered_results = filter(lambda (c,m): m > 0, mod_results) if len(filtered_results) > 0: sorted_results = sorted(filtered_results, key=itemgetter(1), reverse=True) membership[vert] = sorted_results[0][0] diff = reduce(lambda x,y: x+y, map(lambda (x,y): 1 if x != y else 0, zip(starting_membership, membership)), 0) print "Membership diff of", diff if starting_membership == membership: print "No further changes can be made" break; if len(results) != 0 and results[len(results)-1]== membership: print "No further improvements, finished on ", k break; previous_communities = None if "community" in set(graph.vertex_attributes()): previous_communities = {i:e for i,e in enumerate(graph.vs["community"])} #print previous_communities results.append(copy.copy(membership)) optimal_membership = copy.copy(membership) #Rename optimal membership so it'll remove nodes, communities should be 0 to n. for k, x in enumerate(sorted(set(optimal_membership))): for l, y in enumerate(optimal_membership): if x == y: optimal_membership[l] = k print optimal_membership combinations = { "sim" : lambda x: sum_attributes(x) } graph.contract_vertices(optimal_membership, combine_attrs=combinations) community_dict = defaultdict(list) for k, x in enumerate(optimal_membership): community_dict[x].append(k) if previous_communities is None : community_list = [set(community_dict[l]) for l in community_dict] else : community_list = [[previous_communities[c] for c in community_dict[l]] for l in community_dict] community_list = map(lambda x: [item for sublist in x for item in sublist], community_list) print community_list graph.vs["community"] = community_list graph.simplify(combine_edges=dict(weight="sum"), multiple=True, loops=False) return graph.vs["community"]
if sys.platform == 'linux2': path = '../' if not os.path.exists(path+"IgraphEdges") : #igraphe require a specific input format fin = open(path+'database_'+maxDateStr+'.txt.gz') df = pandas.read_csv(fin,sep=",",encoding="utf8",compression = 'gzip') df["userID"] *= 2 df["movieID"] *= 2 df["movieID"] += 1 df[["userID","movieID","rating"]].to_csv(path+"IgraphEdges",sep = "\t",encoding = "utf-8",header = False, index = False) fin = path+"IgraphEdges" print "Now let's try Igraph" g = Graph.Read_Ncol(fin, directed=True,weights = True) #read the graph g.vs["type"] = [int(name)%2 == 1 for name in g.vs["name"]] #assign the movie or user type : 1 = movie igraph.summary(g) timestart = time.time() ##Not enough RAM, as expected g.get_adjacency() print "time to compute the adjacency matrix %d sec" % int(time.time() - timestart) timestart = time.time() a = g.degree_distribution(mode = "in") print a print "time to compute inbound (~movies) degree distribution (for the bipartite graph) %d sec" % int(time.time() - timestart) timestart = time.time() a = g.degree_distribution(mode = "out") print a print "time to compute outbound (~user) degree distribution (for the bipartite graph) %d sec" % int(time.time() - timestart)
#igraph.write(g, "randam_test.gml") #a = DataPolishing(g) #igraph.summary(a.Graph) #print a.Graph.maximal_cliques(min = 3) #print len(a.Graph.maximal_cliques(min = 3)) #a.data_polish(polish_ratio = pr) #print a.Graph.maximal_cliques(min = 3) #print len(a.Graph.maximal_cliques(min = 3)) #igraph.write(a.Graph, "polished_grapht.gml") #igraph.summary(a.Graph) c = Experiment(5000) igraph.summary(c.Graph) c.make_Graph(30, 100) igraph.summary(c.Graph) #print len(c.clique_list) g = c.Graph.copy() d = DataPolishing(g) igraph.summary(d.Graph) print len(d.Graph.maximal_cliques(min=3)) #igraph.write(d.Graph, "randam_clique_5000.gml") d.data_polish(polish_ratio=pr) igraph.summary(d.Graph) print len(d.Graph.maximal_cliques(min=3)) #igraph.write(d.Graph, "polished_clique_5000.gml") print "recall = ", c.recall(d.Graph) print "precision = ", c.precision(d.Graph) print "accuracy = ", c.accuracy(d.Graph)
cnt = 0 for line in f: ls = line[:-2].split('\t') #print ls #follower = #for i in xrange(1,len(ls)/2): follower = int(ls[0]) for i in xrange(1, len(ls) / 2): followee = int(ls[2 * i]) edges.append([followee, follower]) f.close() g.add_edges(edges) print igraph.summary(g) print 'loading graph takes %f' % (time() - t_0) t_0 = time() evcent = g.evcent() print 'evcent computation takes %f' % (time() - t_0) #print type(evcent),len(evcent) t_0 = time() k_shell = g.shell_index() print 'kshell computation takes %f' % (time() - t_0) t_0 = time() outdeg = g.outdegree() print 'outdeg computation takes %f' % (time() - t_0) #print type(k_shell),len(k_shell)
loss = train_batch(model, X_couples, y_labels) losses += loss if epoch % print_every == 0: logging.info( "Mean loss in Epoch [%s] with %s valid i8sequences = %s" % (epoch, valid_sequences, losses / valid_sequences)) losses, valid_sequences = 0.0, 0 if __name__ == "__main__": # g = Graph.Read_Edgelist("deepwalk/p2p-Gnutella08.edgelist") g = load_adjlist("deepwalk/karate.adjlist", directed=False) vocab_size = len(g.vs) max_len = 5 save = True sampling_table = make_sampling_table(vocab_size) degrees = np.array(g.vs.degree()) inv_sqrt_degree = 1 / np.sqrt(degrees) sampling_table = inv_sqrt_degree / np.sum(inv_sqrt_degree) logging.info("Graph Summary: \n", summary(g)) logging.info("Building Model") if save: model = pickle.load(open("out/Karate.Model.3100.pkl")) else: model = pickle.load("out/Karate.Model.3100.pkl") model = Sequential() model.add(WordContextProduct(vocab_size, proj_dim=300, init='uniform')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') #couples, labels = skipgrams(sequences[np.random.randint(vocab_size)], vocab_size, window_size=4, negative_samples=1.0, sampling_table=sampling_table) #train_on_model(model, g, vocab_size, print_every=1) #pickle.dump(model, open("out/Karate.Model.3100.pkl", "wb"))
def confimation(self, graph_ig, graph_nx): igraph.summary(graph_ig) print(graph_nx.number_of_edges()) print(graph_nx.number_of_nodes())
#!/usr/bin/env python # -*- coding: utf-8 -*- import igraph as G import numpy as np # Build the graph p2 = G.Graph.Read_GML("../archivos/redchica.gml") G.summary(p2) # Plot the graph betweenness = p2.betweenness() pageranks = [round(i, 3) for i in p2.pagerank()] indegree = p2.degree(mode="in") names = p2.vs["label"] p2.vs["label"] = ["B: " + str(betweenness[i]) + "\nPR: " + str(pageranks[i]) + "\nInD: " + str(indegree[i]) + "\n" + names[i] for i in range(8)] p2.es["width"] = 1 p2.vs["color"] = "lightblue" p2.vs["size"] = 80 print sorted([(i, j) for i, j in enumerate(indegree)], key=lambda x: x[1], reverse=True) print sorted([(i, j) for i, j in enumerate(betweenness)], key=lambda x: x[1], reverse=True) print sorted([(i, j) for i, j in enumerate(pageranks)], key=lambda x: x[1], reverse=True) G.plot(p2, "../img/p4-all.png", margin=50) # plot indegree p2.vs["label"] = ["InD: " + str(indegree[i]) + "\n" + names[i] for i in range(8)] p2.vs['size'] = [500.0*i/sum(indegree) for i in indegree] p2.vs["color"] = "green" G.plot(p2, "../img/p4-indegree.png", margin=50) # plot betweenness
import networkx as nx import community import igraph import cairocffi #G=nx.read_gml("erdoscom.gml") #print(nx.info(G)) iG=igraph.Graph() iG=igraph.read("erdos.gml") print (igraph.summary(iG)) print(iG) #vertDendo=iG.community_fastgreedy() #print(vertDendo.membership) igraph.plot(iG)
"""print g.vs.select(long_lt=4)["name"] print g.vs.select(_degree = g.maxdegree())["name"] _lt = less than _eq = equals""" """g.vs["name"] = ["Alice", "Bob", "Claire", "Dennis", "Esther", "Frank", "George"] g.vs["age"] = [25, 31, 18, 47, 22, 23, 50] g.vs["gender"] = ["f", "m", "f", "m", "f", "m", "m"] g.es["is_formal"] = [False, False, True, True, True, False, True, False, False] print g.es[0] g.es[0]["is_formal"] = True print g.es[0] g.es[0]["name"] = "Sharon" print g.es[0]""" """g1 = ig.Graph.Tree(127, 2) g2 = ig.Graph.Tree(127, 2) print g2.get_edgelist() == g1.get_edgelist() ig.summary(g1)""" """tree = ig.Graph.Tree(127, 2) g=nx.Graph()## n0=0 n=24 nodeSize=500 fontSize=12 airportCodes = [1,2,3,4,5,6,7] g.add_nodes_from(airportCodes) g.add_edges_from([(1,2), (1,3), (3,2), (3,4), (4,5), (4,6), (6,5), (5,7), (6,7)]) print tree.get_edgelist() layout=G.layout("kk")
def calc_hac_communities(h5_data, adjacency_matrix, linkage_method="average", metric="correlation", plot_flag=True, threshold=None): distance_matrix = 1 - adjacency_matrix # Create condensed distance matrix # A condensed distance matrix is a flat array containing the upper triangular of the distance matrix. (SciPy) distance_array = distance_matrix[np.triu_indices_from(distance_matrix, k=1)] # Alternative to the upper version #np.fill_diagonal(distance_matrix, 0.0) #distance_matrix = np.around(distance_matrix, 7) #Attention! Round affects clustering #distance_array = squareform(distance_matrix) # Linkage can be single, complete, average, weighted # Calculate linkage matrix z = hac.linkage(distance_array, linkage_method, metric) # Creation of the actual graph hac_community_G = base_graph_structure(h5_data, adjacency_matrix) # Calculate dendrogram-cut based on modularity optimization threshold_list = [] for x in range(1, len(adjacency_matrix) + 1): memberships = hac.fcluster(z, x, criterion="maxclust") threshold_list.append(modularity_trsh(memberships, hac_community_G)) if plot_flag == True: plt.figure() plt.xticks(range(0, len(adjacency_matrix)), range(1, len(adjacency_matrix) + 1)) plt.title("modularity") plt.plot(threshold_list) plt.figure() hac.dendrogram(z) plt.show() if threshold == None: print("") print("Threshold by Modularity used!") # +1 because modularity calculation starts with 1 cluster instead of 0, but indexing starts with 0 threshold = threshold_list.index(max(threshold_list)) + 1 else: print("") print("Threshold set manually!") # Calculate Hierarchical Clustering #membership_list = hac.fclusterdata(data_matrix, threshold, criterion="maxclust", metric=metric, method=linkage_method) membership_list = hac.fcluster(z, threshold, criterion="maxclust") # Reduce each membership value by one # fcluster starts with membership number one, for transformation into ig.VertexClustering a starting membership of zero is needed membership_list = map(lambda x: x - 1, membership_list) hac_communities = ig.VertexClustering(hac_community_G, membership=membership_list) print("") print("Community Graph:") print(ig.summary(hac_community_G)) print("") print("Threshold of Dendrogramm Cut: " + str(threshold)) # Add community membership as attribute for vertex in hac_community_G.vs: vertex["membership"] = hac_communities.membership[vertex.index] print("") print("Number of Communities: " + str(len(list(hac_communities)))) # Calculate unweighted modularity modularity = hac_communities.modularity # Calculate weighted modularity # modularity = hac_community_G.modularity(hac_communities, weights=hac_community_G.es["weight"]) print("") print("Modularity: " + str(modularity)) return hac_community_G, hac_communities
# assign node names and weights to be attributes of the vertices and edges # respectively G.vs['label'] = vocab G.es['weight'] = weights # I will also assign the weights to the 'width' attribute of the edges. this # means that igraph.plot will set the line thicknesses according to the edge # weights #G.es['width'] = weights # plot the graph, just for fun (oops need to install Cairo for this) #igraph.plot(G, layout="rt", labels=True, margin=80) # run the greedy community detection algorithm print ig.summary(G) print G.get_edgelist()[1:20] print G.vs['label'][1:20] # quick look at the degree histogram NUMBINS = 20 if verboseplot: plt.figure() plt.hist(G.degree(), NUMBINS) plt.title('degree distribution for the word co-occurrences graph') plt.show() print "finding high modularity communities..." G_simple = G.simplify() # removes self loops and duplicate edges word_dendrogram = G.community_fastgreedy() print "word dendrogram " + str(word_dendrogram.merges)
df = pandas.read_csv(fin, sep=",", encoding="utf8", compression='gzip') df["userID"] *= 2 df["movieID"] *= 2 df["movieID"] += 1 df[["userID", "movieID", "rating"]].to_csv(path + "IgraphEdges", sep="\t", encoding="utf-8", header=False, index=False) fin = path + "IgraphEdges" print "Now let's try Igraph" g = Graph.Read_Ncol(fin, directed=True, weights=True) #read the graph g.vs["type"] = [int(name) % 2 == 1 for name in g.vs["name"] ] #assign the movie or user type : 1 = movie igraph.summary(g) timestart = time.time() ##Not enough RAM, as expected g.get_adjacency() print "time to compute the adjacency matrix %d sec" % int(time.time() - timestart) timestart = time.time() a = g.degree_distribution(mode="in") print a print "time to compute inbound (~movies) degree distribution (for the bipartite graph) %d sec" % int( time.time() - timestart) timestart = time.time() a = g.degree_distribution(mode="out") print a
def generate_node_edge_lists(msi_frame_1, msi_frame_2, graph_1, graph_2): edges_g1 = [(msi_frame_1.columns[e.tuple[0]], msi_frame_1.columns[e.tuple[1]]) for e in graph_1.es] edges_g2 = [(msi_frame_2.columns[e.tuple[0]], msi_frame_2.columns[e.tuple[1]]) for e in graph_2.es] nodes_g1 = [msi_frame_1.columns[v.index] for v in graph_1.vs] nodes_g2 = [msi_frame_2.columns[v.index] for v in graph_2.vs] nodes_g1_only = set(nodes_g1) - set(nodes_g2) nodes_g2_only = set(nodes_g2) - set(nodes_g1) nodes_common = set(nodes_g1) & set(nodes_g2) edges_g1_only = set(edges_g1) - set(edges_g2) edges_g2_only = set(edges_g2) - set(edges_g1) edges_common = set(edges_g1) & set(edges_g2) print(edges_g1) print(edges_g2) print(nodes_g1) print(nodes_g2) print("len symdiff edges") print(len(set(edges_g1) ^ set(edges_g2))) print("len symdiff nodes") print(len(set(nodes_g1) ^ set(nodes_g2))) print("len union edges") print(len(set(edges_g1) & set(edges_g2))) print("len union nodes") print(len(set(nodes_g1) & set(nodes_g2))) print("g1 only nodes") print(len(nodes_g1_only)) print("g2 only nodes") print(len(nodes_g2_only)) print("g1 only edges") print(len(edges_g1_only)) print("g2 only edges") print(len(edges_g2_only)) print(ig.summary(graph_1)) print(ig.summary(graph_2)) for x in nodes_g1_only: if x in nodes_g2: print("error") if x not in nodes_g1: print("error") for x in nodes_g2_only: if x in nodes_g1: print("error") if x not in nodes_g2: print("error") for x in nodes_common: if x not in nodes_g1: print("error") if x not in nodes_g2: print("error") for x in edges_g1_only: if x in edges_g2: print("error") if x not in edges_g1: print("error") for x in edges_g2_only: if x in edges_g1: print("error") if x not in edges_g2: print("error") for x in edges_common: if x not in edges_g1: print("error") if x not in edges_g2: print("error") return nodes_g1, edges_g1, nodes_g1_only, edges_g1_only, \ nodes_g2, edges_g2, nodes_g2_only, edges_g2_only, \ nodes_common, edges_common
def gen_graph_from_nol(fncol): g = Graph() g = g.Read_Ncol(fncol, names=True, weights=True, directed=True) print summary(g) return g
'edge_osmid': e_p['link_id'], 'edge_index': edge_index, 'start_node': e_p['start_node'], 'end_node': e_p['end_node'], 'sec_speed': e_p['sec_speed'], 'sec_length': e_length, 'sec_duration': e_length / e_p['sec_speed'] } nodes_in_edge_set.add(e_p['start_node']) nodes_in_edge_set.add(e_p['end_node']) edge_data.append(edge_element) edge_index += 1 ### Check if all nodes in the edge dataset are contained in the provided nodes dataset print(nodes_in_edge_set.issubset(set([*node_json]))) g = igraph.Graph.DictList(vertices=node_data, edges=edge_data, vertex_name_attr='node_osmid', edge_foreign_keys=('start_node', 'end_node'), directed=True) print(igraph.summary(g)) # print(g.vs[0]) # print(g.es.find(edge_osmid='101554764')) # route_a = g.get_shortest_paths( # g.vs.find(node_osmid='1172644728'), # g.vs.find(node_osmid='1172712808'),output="epath") # print(route_a) g.write_graphmlz('{}_{}_0509.graphmlz'.format(FOLDER, FILE)) # g = igraph.load('Collected_data_False14.graphmlz')
#igraph.write(g, "randam_test.gml") #a = DataPolishing(g) #igraph.summary(a.Graph) #print a.Graph.maximal_cliques(min = 3) #print len(a.Graph.maximal_cliques(min = 3)) #a.data_polish(polish_ratio = pr) #print a.Graph.maximal_cliques(min = 3) #print len(a.Graph.maximal_cliques(min = 3)) #igraph.write(a.Graph, "polished_grapht.gml") #igraph.summary(a.Graph) c = Experiment(5000) igraph.summary(c.Graph) c.make_Graph(30,100) igraph.summary(c.Graph) #print len(c.clique_list) g = c.Graph.copy() d = DataPolishing(g) igraph.summary(d.Graph) print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "randam_clique_5000.gml") d.data_polish(polish_ratio = pr) igraph.summary(d.Graph) print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "polished_clique_5000.gml") print "recall = " , c.recall(d.Graph) print "precision = " , c.precision(d.Graph) print "accuracy = " , c.accuracy(d.Graph)
# ============================ Q6 ============================= import igraph as ig import json g = ig.Graph.Read(f='dataset/edge_weight', format='ncol', directed=False) gcc = g.components().giant() ig.summary(g) name_to_disname_loc = {} with open('dataset/san_francisco_censustracts.json', 'r') as f: data = json.loads(f.readline()) features = data['features'] for feature in features: coordinates = feature['geometry']['coordinates'][0][0] latitude = 0 longitude = 0 for coordinate in coordinates: latitude += coordinate[0] longitude += coordinate[1] latitude /= len(coordinates) longitude /= len(coordinates) name_to_disname_loc[feature['properties']['MOVEMENT_ID']] = ( feature['properties']['DISPLAY_NAME'], latitude, longitude) seq = gcc.vs() for i in seq: i['display name'] = name_to_disname_loc[i['name']][0] i['location'] = (name_to_disname_loc[i['name']][1], name_to_disname_loc[i['name']][2])
# Initiate the graph graph = ig.Graph(1) # Add nodes aka vertices graph.add_vertices(2) # Add edges graph.add_edges([(0, 1), (1, 2)]) # Delete edges # graph.delete_edges() # Delete vertices # graph.delete_vertices() # To get the id of an edge between two nodes # graph.get_eid(nodeID_one, nodeID_two) graph.add_edges([(2, 0)]) graph.add_vertices(3) graph.add_edges([(2, 3), (3, 4), (4, 5), (5, 3)]) # Draw graphs # Deterministic generators produce the same graph if you call them with exactly the same parameters, while stochastic # generators produce a different graph every time. #print(graph) # Doesn't work print(ig.summary(graph))
import easygui from matplotlib import pyplot as plt import numpy as np print(ig.__version__) # # TASK 1: Read Data Files # filename = "./USAir97.net" print(filename) g = ig.Graph.Read_Pajek(filename) ig.summary(g) g.is_weighted() # Get the attributes and the number of nodes print(g.vs.attribute_names()) print(g.vcount()) # Get the attributes and the number of edges print(g.es.attribute_names()) print(g.ecount()) # # TASK 2: Calculating Centrality Measures # degree = g.degree()
import igraph, pylab g = igraph.load('social_comp.graphml') #Graph Summary print "Graph Summary :", igraph.summary(g) print "Directed : ", g.is_directed() #Directed graph print "Weighted : ", g.is_weighted() #Not weighted graph print "Diameter: ", g.diameter() #7 print "Density: ", g.density() #0.000788246509705 #Determine the strongly connected components by clustering using strong connection g_clusters = g.clusters(mode='strong') #Check the cluster sizes cluster_sizes = g_clusters.sizes() max_cluster = max(cluster_sizes) #Clusters of size 1 clusters_of_size_1 = cluster_sizes.count(1) print 'No. of strongly connected components in the graph: ', len( g_clusters) #5736 print 'Length of largest strongly connected component in the graph: ', max_cluster #154 # print 'No. of strongly connected components with just single node in the graph: ',clusters_of_size_1 # print type(g.betweenness(directed=True)) #Calculate the betweenness centrality values for nodes betweeness_centrality_values = g.betweenness(directed=True) #Calculate the histogram for the betweenness centrality values betweeness_centrality_values_histogram = igraph.Histogram( bin_width=10, data=betweeness_centrality_values) # print 'Distribution of betweenness centrality values among nodes\n',betweeness_centrality_values_histogram
def generate_projection_image(V, E, V_lon, E_lon, A, n, out_file, g_min=None): E_probs = get_edges_probs(E) E = set(E.keys()) print("projection nodes in LON", len(V & V_lon), " / ", len(V) ) print("projection edges in LON", len(E & set(E_lon.keys())), " / ", len(E)) results = sorted([path_length(s, A, n) for s in V]) # ascending if not g_min: g_min = results[0] if len(results) > 400: threshold = results[400] # max number of nodes else: threshold = results[-1] V = {s for s in V if path_length(s, A, n) <= threshold} # s - path V_ = {s: (i, path_length(s, A, n)) for i, s in enumerate(V)} Not_Sinks = set() for s in V: # TODO optimal? for (v, u) in E: if v == s: Not_Sinks.add(s) break V_c = np.zeros(len(V_), dtype=object) for s, (i, r) in V_.items(): V_c[i] = (s, r) E_ = [(V_[s1][0], V_[s2][0]) for s1, s2 in E if s1 in V_ and s2 in V_] E_size = [5 * E_probs[s1, s2] for s1, s2 in E if s1 in V_ and s2 in V_] # TODO pos_glob = find_pos_glob(V_, E, g_min, A, n) g = igraph.Graph(directed=True) g.add_vertices(len(V_)) g.add_edges(E_) visual_style = {} visual_style["layout"] = \ g.layout_fruchterman_reingold(maxiter=5000) visual_style["vertex_color"] = ['red' if t[0] in pos_glob and t[0] in V_lon else 'pink' if t[0] in pos_glob and t[0] not in V_lon else '#87CEFA' if t[0] not in V_lon else 'blue' for t in V_c] visual_style["vertex_frame_color"] = \ [visual_style["vertex_color"][i] if t[0] in Not_Sinks else 'black' for i, t in enumerate(V_c)] visual_style["vertex_frame_width"] = [2 for i in V_c] visual_style["vertex_size"] = [10 if t[0] in Not_Sinks else 20 for t in V_c] visual_style["edge_color"] = ['darkgrey' if e in set(E_lon.keys()) else 'lightgrey' for e in E_] visual_style["edge_width"] = E_size visual_style["bbox"] = (0, 0, 1800, 1000) igraph.summary(g) image = igraph.plot(g, **visual_style) image.save(out_file + '.png') print("image ", out_file) pass
"To isotopy class" + "\t" + "Diagram" + "\n") for e in graph.es: if start_part in graph.vs[ e.source]["iso_class"] and end_part in graph.vs[ e.target]["iso_class"]: file.write( str(graph.vs[e.source]["iso_class"]) + "\t" + str(graph.vs[e.source]["gc"]) + "\t" + str(graph.vs[e.target]["iso_class"]) + "\t" + str(graph.vs[e.target]["gc"]) + "\n") file.close() print("-------------------") print("Full graph summary.") igraph.summary(graph) connectivity_list = [] print("-------------------") # if not composites: to_delete_ids = [v.index for v in graph.vs if 'ICID' in v['iso_class']] graph.delete_vertices(to_delete_ids) print("Creating isotopy graph.") for e in graph.es: if 'ICID' not in graph.vs[ e.source]["iso_class"] or 'ICID' not in graph.vs[ e.target]["iso_class"]: connectivity_list.append((graph.vs[e.source]["iso_class"], graph.vs[e.target]["iso_class"])) else:
def sac1(graph): graph = graph.as_undirected() results = [] attributes = [attribute_map[x] for i, x in enumerate(attribute_map.keys())] weights = [1 for x in range(0, graph.ecount())] graph.es["weight"] = weights graph.vs["sim"] = attributes #graph.vs["community"] = [] for k in range(0, 15): membership = [(x) for x in range(0, graph.vcount())] membership_old = copy.copy(membership) clustering_old = igraph.VertexClustering(graph, membership) #igraph.plot(clustering_old) print(igraph.summary(clustering_old)) #A pass for k in range(0, 15): starting_membership = copy.copy(membership) for vert in range(0, len(membership)): mod_results = [] q_newman_cached = {} community_size = len(set(membership)) vert_old = igraph.VertexClustering(graph, membership=membership) mod_old = vert_old.modularity for vertj in range(0, len(membership)): community = membership[vertj] if community not in q_newman_cached: membership_copy = copy.copy(membership) membership_copy[vert] = community community_size_new = len(set(membership_copy)) comm_indices = [ i for i, x in enumerate(membership) if x == community ] comm_indices_new = [ i for i, x in enumerate(membership_copy) if x == community ] vert_new = igraph.VertexClustering( graph, membership=membership_copy) mod_new = vert_new.modularity modularity_diff = mod_new - mod_old #if modularity_diff > 0: #print "Modularity", modularity_new, "-", modularity_old, "=", modularity_diff #print "Mod ", mod_new, "-", mod_old, "=", modularity_diff sim_result_old = similarity(graph, comm_indices) sim_result_new = similarity(graph, comm_indices_new) #print sim_result_old, sim_result_new sim_result = (sim_result_new - sim_result_old) q_newman = alpha * modularity_diff + (1 - alpha) * ( sim_result) / (math.pow(community_size_new, 2)) q_newman_cached[community] = q_newman result = (community, q_newman) mod_results.append(result) filtered_results = filter(lambda mod: mod[1] > 0, mod_results) filtered_results = list(filtered_results) if len(filtered_results) > 0: sorted_results = sorted(filtered_results, key=itemgetter(1), reverse=True) membership[vert] = sorted_results[0][0] diff = reduce( lambda x, y: x + y, list( map(lambda x: 1 if x[0] != x[1] else 0, zip(starting_membership, membership))), 0) print("Membership diff of", diff) if starting_membership == membership: print("No further changes can be made") break if len(results) != 0 and results[len(results) - 1] == membership: print("No further improvements, finished on ", k) break previous_communities = None if "community" in set(graph.vertex_attributes()): previous_communities = { i: e for i, e in enumerate(graph.vs["community"]) } #print previous_communities results.append(copy.copy(membership)) optimal_membership = copy.copy(membership) #Rename optimal membership so it'll remove nodes, communities should be 0 to n. for k, x in enumerate(sorted(set(optimal_membership))): for l, y in enumerate(optimal_membership): if x == y: optimal_membership[l] = k print(optimal_membership) combinations = {"sim": lambda x: sum_attributes(x)} graph.contract_vertices(optimal_membership, combine_attrs=combinations) community_dict = defaultdict(list) for k, x in enumerate(optimal_membership): community_dict[x].append(k) if previous_communities is None: community_list = [set(community_dict[l]) for l in community_dict] else: community_list = [[ previous_communities[c] for c in community_dict[l] ] for l in community_dict] community_list = list( map(lambda x: [item for sublist in x for item in sublist], community_list)) print(community_list) graph.vs["community"] = community_list graph.simplify(combine_edges=dict(weight="sum"), multiple=True, loops=False) return graph.vs["community"]
#igraph.summary(c.Graph) #c.make_Graph(15, 200, prob = 0.1) #igraph.summary(c.Graph) #print len(c.clique_list) #g = c.Graph.copy() #d = DataPolishing(g) #igraph.summary(d.Graph) #print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "randam_clique_5000.gml") #d.data_polish(polish_ratio = pr) #igraph.summary(d.Graph) #print len(d.Graph.maximal_cliques(min = 3)) #igraph.write(d.Graph, "polished_clique_5000.gml") #print "recall = " , c.recall(d.Graph) #print "precision = " , c.precision(d.Graph) #print "accuracy = " , c.accuracy(d.Graph) g = igraph.read("twitter_graph.gml") a = DataPolishing(g) print "original:" igraph.summary(a.Graph) print len(a.Graph.maximal_cliques(min=3)) a.data_polish(polish_ratio=pr) print "polished" igraph.summary(a.Graph) print len(a.Graph.maximal_cliques(min=3)) igraph.write(a.Graph, "polished_twitter_grapht.gml")
valid_sequences += 1 loss = train_batch(model, X_couples, y_labels) losses += loss if epoch % print_every == 0: logging.info("Mean loss in Epoch [%s] with %s valid sequences = %s" % (epoch, valid_sequences, losses / valid_sequences)) losses, valid_sequences = 0.0, 0 if __name__ == "__main__": #g = Graph.Read_Edgelist("deepwalk/p2p-Gnutella08.edgelist") g = load_adjlist("deepwalk/karate.adjlist", directed=False) vocab_size = len(g.vs) max_len = 5 save = True sampling_table = make_sampling_table(vocab_size) degrees = np.array(g.vs.degree()) inv_sqrt_degree = 1/np.sqrt(degrees) sampling_table = inv_sqrt_degree/np.sum(inv_sqrt_degree) logging.info("Graph Summary: \n", summary(g)) logging.info("Building Model") if save: model = cPickle.load(open("out/Karate.Model.3100.pkl")) else: model = cPickle.load("out/Karate.Model.3100.pkl") model = Sequential() model.add(WordContextProduct(vocab_size, proj_dim=300, init='uniform')) model.compile(loss='binary_crossentropy', optimizer='rmsprop') #couples, labels = skipgrams(sequences[np.random.randint(vocab_size)], vocab_size, window_size=4, negative_samples=1.0, sampling_table=sampling_table) #train_on_model(model, g, vocab_size, print_every=1) #cPickle.dump(model, open("out/Karate.Model.3100.pkl", "wb"))
edges = list() vertices = set() for result in result1: vertices.add(str(result["leuven"]["name"])) vertices.add(str(result["brugge"]["name"])) vertices.add(str(result["bst"]["name"])) vertices.add(str(result["lst"]["name"])) vertices.add(str(result["ls"]["name"])) vertices.add(str(result["bs"]["name"])) edges.append([str(result["leuven"]["name"]), str(result["brugge"]["name"])]) g1.add_vertices(list(vertices)) g1.add_edges(edges) ig.summary(g1) g1.is_weighted() # Get the attributes and the number of nodes print(g1.vs.attribute_names()) print(g1.vcount()) # Get the attributes and the number of edges print(g1.es.attribute_names()) print(g1.ecount()) # Calculate layout layout_fr = g1.layout("fr") # Define style from network plotting visual_style = {}
# ============================== # Athout: Eugene Seo # Date: 10.21.2016 # Description: CS519 Homework 1 # ============================== import pandas hs = pandas.read_csv("hsmetnet.txt", sep="\t", names=["v_left", "v_right"]) from igraph import * from igraph import summary meta_graph = Graph.TupleList(hs.values.tolist(), directed=True) summary(meta_graph) from collections import defaultdict metabolite_set = set() reaction_set = set() metabolite_degree = defaultdict(int) metabolite_idx = [] for v in meta_graph.vs: if 'REACTION' in v['name']: reaction_set.add(v) continue else: metabolite_degree[v['name']] = v.degree() metabolite_idx.append(v.index) metabolite_set.add(v) print "A. number of distinct metabolities:", len(metabolite_set) print "A. number of distinct reactions:", len(reaction_set) print "A. number of edges:", hs.shape[0]
def read_edges(f_name): print(f_name) g = ig.Graph.Read_Ncol(f_name, names=True, directed=False) ig.summary(g) return g
def gen_random_paths(count): sum = 0 for i in xrange(count): t = gen_random_path() sum += t print i, t print print 'avg', sum/count, 'seconds' def gen_random_weighed_paths(count): sum = 0 for i in xrange(count): t, skips = gen_random_weighted_path4() sum += t print i, t, skips print print 'avg', sum/count, 'seconds' #gen_random_paths(1000) ig.summary(G) gen_random_weighed_paths(200) ig.summary(G) print 'is weigted', G.is_weighted()
i = i - 1 #east i = j + 1 while i < len(rowlist) and rowlist[i][0] - rowlist[j][0] < detectrange: #checkcontact_minute(i,j,rowlist,cg,row) checkcontact(i, j, rowlist, cg, row, radius, exposurelimit) #checkcontact_merge(i,j,rowlist,cg,row,table) i = i + 1 print("Loading matrix.npy") table = np.load("matrix.npy") radius = 50 exposurelimit = 3 cg = igraph.Graph() cg.add_vertices(table.shape[1]) print("Building Graph") t1 = time.time() for x in range(table.shape[0]): processrow(table, x, cg, radius, exposurelimit) print("Time: ", time.time() - t1) name = "contactgraph" + str(radius) + "." + str(exposurelimit) + ".zip" cg.write_graphmlz(name) #cg = cg.Read_GraphMLz("contactgraph10.5.zip") igraph.summary(cg)
import igraph import csv import sys if __name__=='__main__': input_file=sys.argv[1] output_file=sys.argv[2] g=igraph.Graph.Read_Ncol(input_file, directed=True) g.summary() print igraph.summary(g) estimate = g.constraint() with open(output_file, 'wb') as fout: outcsv = csv.writer(fout) for v in g.vs: outcsv.writerow([v["name"], estimate[v.index]])
# In[1]: import pandas edge_list_ecoli = pandas.read_csv("ecolitfnet.txt", sep="\t", names=["source", "target"]) edge_list_ecoli.head(n=6) # Step 2. Make an igraph directed graph from the network; print a graph summary as a sanity check. # In[2]: from igraph import Graph from igraph import summary ecoli_graph = Graph.TupleList(edge_list_ecoli.values.tolist(), directed=True) summary(ecoli_graph) # Q1. Which one of connected 3-vertex motifs is most fequent in the E. coli regularoty network? # In[3]: import numpy as np three_vertex_motifs_counts = ecoli_graph.motifs_randesu(size=3) print np.nanargmax(three_vertex_motifs_counts) # Q2. Which one of these motifs has a count of 47 in the regularoty network? (FFL) # In[4]: print three_vertex_motifs_counts.index(47)
if os.path.isfile('edgelist_%s.txt' % KEYWORD): os.remove('edgelist_%s.txt' % KEYWORD) if os.path.isfile('nodes_%s.txt' % KEYWORD): os.remove('nodes_%s.txt' % KEYWORD) os.system('make edgelist_%s.txt' % KEYWORD) os.system('make nodes_%s.txt' % KEYWORD) import igraph g = igraph.Graph.Read_Ncol('edgelist_%s.txt' % KEYWORD, directed=False) mapping = {} with open('nodes_%s.txt' % KEYWORD) as f: for row in f.read().splitlines(): mapping[row.split()[0]] = row.decode('utf-8', 'ignore').encode('ascii', 'ignore') # Read_Edgelist igraph.summary(g) g.simplify() igraph.summary(g) # http://stackoverflow.com/questions/9471906/what-are-the-differences-between-community-detection-algorithms-in-igraph if ALG == "infomap": comms = g.community_infomap() # 12 elif ALG == "fastgreedy": comms = g.community_fastgreedy().as_clustering() # CNM 4 elif ALG == "multilevel": comms = g.community_multilevel() # louvain, 5 # comms = g.community_edge_betweenness(directed=False).as_clustering() # TOO SLOW # comms = g.community_label_propagation() # too few communities? # comms = g.community_leading_eigenvector() # 5 # comms = g.community_optimal_modularity() # too slow?
def generateGraph(self, netLogoWorld, fileNameOut, nodeSizeMultiplyer, coordMultiplyer, edgeWeightMultiplyer, edgeWeightIgnore, nodeNamePrefix): self.netLogoWorld = netLogoWorld; self.fileNameOut = fileNameOut; print("Generating graph started ..."); print("nodeSizeMultiplyer=%f, coordMultiplyer=%f, edgeWeightMultiplyer=%f, edgeWeightIgnore=%s, , nodeNamePrefix='%s'" \ % (nodeSizeMultiplyer, coordMultiplyer, edgeWeightMultiplyer, edgeWeightIgnore, nodeNamePrefix)) print "Igraph version %s" % (igraph.__version__); self.graph = igraph.Graph(); # populating graph nodes from turtles self.graph.add_vertices(len(self.netLogoWorld.turtles)); turtle = Turtle(); i = 0; for turtle in self.netLogoWorld.turtles: #print("Turtle: who=%d, label=%s" %(turtle.who, turtle.label)) # We cannot use id: # self.graph.vs[i]['id'] = turtle.who; # it was necesarry to add name to be able to refer to names of edges when we are adding edges later # that is only possible way, since vertex ids (turtles who) are not necessarily starting from 0, and igrah insist on 0 and non-sparce vertices ids self.graph.vs[i]['name'] = str(turtle.who); self.graph.vs[i]['size'] = turtle.size * nodeSizeMultiplyer; rgbColor = NetLogoWorld.colorNetlotoToRgb(turtle.color); self.graph.vs[i]['r'] = rgbColor[0]; self.graph.vs[i]['g'] = rgbColor[1]; self.graph.vs[i]['b'] = rgbColor[2]; self.graph.vs[i]['x'] = turtle.xcor * coordMultiplyer; self.graph.vs[i]['y'] = turtle.ycor * coordMultiplyer; if(turtle.label == None or turtle.label == ""): self.graph.vs[i]['label'] = "%s%d" % (nodeNamePrefix, turtle.who); else: self.graph.vs[i]['label'] = turtle.label; #self.graph.vs[i]['hophop'] = 'YESSS!!!'; # adding additional non-recognized columns #print "keys:%s " %(turtle.additionalParams.keys()); print(turtle.additionalParams.keys()) for columnName in turtle.additionalParams.keys(): columnType = turtle.columnTypes[columnName]; print("columnName=%s, columnType=%s, value=%s" %(columnName, columnType, turtle.additionalParams[columnName])); if(columnType == "string"): self.graph.vs[i][columnName] = str(turtle.additionalParams[columnName]); elif(columnType == "int"): self.graph.vs[i][columnName] = int(turtle.additionalParams[columnName]); elif(columnType == "float"): self.graph.vs[i][columnName] = float(turtle.additionalParams[columnName]); i =i+1; # populating edges nodes from links link = Link(); i = 0; for link in self.netLogoWorld.links: #print("link.end1 = %s, link.end2=%s" % (str(link.end1), str(link.end2))); #print self.graph; #print self.graph.get_edgelist(); # we cannot add by integers # self.graph.add_edges([(link.end1, link.end2)]); # because, that is recognized as igraph's vertex IDs, which do not need to match NetLogo turtle WHOs (if they do not start from 0) # There fore we need to refer by vertex names, and to do that we need to provide .add_edges() with strings instead of integers self.graph.add_edges([(str(link.end1), str(link.end2))]); self.graph.es[i]['Edge Id'] = link.end1 * 1000 + link.end2; if(link.label == None or link.label == ""): self.graph.es[i]['Edge Label'] = self.graph.es[i]['label'] = "%d-%d" % (link.end1, link.end2); else: self.graph.es[i]['Edge Label'] = self.graph.es[i]['label'] = turtle.label; if(not edgeWeightIgnore): self.graph.es[i]['weight'] = link.thickness*edgeWeightMultiplyer; print(link.additionalParams.keys()) #print link.additionalParams.keys(); for columnName in link.additionalParams.keys(): columnType = link.columnTypes[columnName]; print("columnName=%s, columnType=%s, value=%s" %(columnName, columnType, link.additionalParams[columnName])); if(columnType == "string"): self.graph.es[i][columnName] = str(link.additionalParams[columnName]); elif(columnType == "int"): self.graph.es[i][columnName] = int(link.additionalParams[columnName]); elif(columnType == "float"): self.graph.es[i][columnName] = float(link.additionalParams[columnName]); i =i+1; igraph.summary(self.graph); self.graph.write_graphml(fileNameOut); print("Generating graph finished ...");
def iGraph(self, debug = False, mode = "authorities", output = "ehri.graphml", direct = False):#Let's create the graph #We need its number of nodes first graphLength = len(set(self.index["items"])) if mode == "authorities": graphLength += len(self.index["authorities"]) #Just checking if debug == True: print "Graph vertices : " + str(graphLength) #Create its instance g = igraph.Graph(graphLength, directed = direct) #Now we needs names of stuff, lets call it labels labels = [item for item in self.index["items"]] #We do miss labels of authorities, dont we ? if mode == "authorities": labels += self.index["authorities"] #Have we got same number than graphLength ? if debug==True: print "Labels length " + str(len(labels)) #Just to be sure : if len(labels) != graphLength: print "Not the same number of names and labels you fool" print "So you shall not pass" sys.exit() #We create another thing : we save index of items and labels in a dictionary, because that's why index = {} for name in labels: index[name] = len(index) #Isn't it beautiful ? #So now, we can add labels to our graph g.vs["label"] = labels #Would be nice to connect it... #Hello EDGES edges = [] weight = [] for i in self.index["items"]: for a in self.index["items"][i]: if mode == "cluster": edges.append((index[i], index[a[0]])) weight.append(a[1]) else: edges.append((index[i], index[a])) g.add_edges(edges) if mode == "cluster": g.es["weight"] = weight #A little sum-up ? if debug == True: igraph.summary(g) try: if mode != "cluster": #Let's try to make some community out of it... d = g.community_fastgreedy() cl = d.as_clustering() #Let's save this clusterization into an attribute g.vs["fastgreedy"] = cl.membership #Sping glass not possible except: print "Fast greedy not working. Multi edges graph ?" #And do that with other clusterization modules d = g.community_walktrap() cl = d.as_clustering() #Let's save this clusterization into an attribute g.vs["walktrap"] = cl.membership g.save(output)
def BuildGraph(start_date, end_date, site_info = None, tags = [], start_page = 1, graph = None): """ Builds a graph from data from StackExchange with the specified site and tags, between the start and end date. The graphs's vertices are tags and edges are placed when two tags appear on the same question. Can also add to an existing graph. This and the start_page argument are useful if quota limits prevented grabbing all the data """ if not graph: # Create a new graph graph = igraph.Graph(directed = False) graph.es['weight'] = 1.0 # make the graph weighted if not site_info: site_info = {'site': 'stackoverflow', 'base_url': 'http://api.stackexchange.com', 'sleep_time': 0.035} # s, since limited to 30 requests/s} # Keep track of how we've grab from StackExchange # and if we've grabbed everything quota_remaining = 10000 has_more = True page = start_page count = 0 while quota_remaining > 0 and has_more: # Grab a bunch of questions url = GetQuestionRequestString(page, start_date, end_date, tags, site_info) r = requests.get(url) data = r.json() # handle throttling # the API can tell us to back off, otherwise use a default time sleep_time = float(data.get('backoff', site_info['sleep_time'])) time.sleep(site_info['sleep_time']) # for throttling # process the questions and add to graph questions = data.get('items', []) # empty list if no questions returned for question in questions: AddQuestionToGraph(question, graph) count += 1 # update for next iteration if quota_remaining > 0: page += 1 has_more = data['has_more'] quota_remaining = data['quota_remaining'] print "Processed %i pages and %i questions."%(page, count) if quota_remaining == 0: print " but ran out of quota." else: print " (%i quota left)"%(quota_remaining) print "Graph summary:" igraph.summary(graph) return graph