def km_random(g,k=5,m=3,start=None): """ k nodes of breath first sequence; m add and del number.""" if start==None: start=g.nodes().pop() bfList=list(nx.bfs_edges(g,start)) bfList.reverse() bfList.append((start,start)) tempk=[] try: while bfList: for each in range(k): tempk.append(bfList.pop()[1]) tg=nx.subgraph(g,tempk) e=del_edge(tg,m) g.remove_edges_from(e) tg=nx.subgraph(g,tempk) e=add_edge(tg,m) g.add_edges_from(e) tempk=[] except IndexError: print "pop finishing"
def calc_euler_tour(g, start, end): '''Calculates an Euler tour over the graph g from vertex start to vertex end. Assumes start and end are odd-degree vertices and that there are no other odd-degree vertices.''' even_g = nx.subgraph(g, g.nodes()) if end in even_g.neighbors(start): # If start and end are neighbors, remove the edge even_g.remove_edge(start, end) comps = list(nx.connected_components(even_g)) # If the graph did not split, just find the euler circuit if len(comps) == 1: trail = list(nx.eulerian_circuit(even_g, start)) trail.append((start, end)) elif len(comps) == 2: subg1 = nx.subgraph(even_g, comps[0]) subg2 = nx.subgraph(even_g, comps[1]) start_subg, end_subg = (subg1, subg2) if start in subg1.nodes() else (subg2, subg1) trail = list(nx.eulerian_circuit(start_subg, start)) + [(start, end)] + list(nx.eulerian_circuit(end_subg, end)) else: raise Exception('Unknown edge case with connected components of size {0}:\n{1}'.format(len(comps), comps)) else: # If they are not neighbors, we add an imaginary edge and calculate the euler circuit even_g.add_edge(start, end) circ = list(nx.eulerian_circuit(even_g, start)) try: trail_start = circ.index((start, end)) except: trail_start = circ.index((end, start)) trail = circ[trail_start+1:] + circ[:trail_start] return trail
def match_story_by_sen_edge(Gs, stories, target, tau): existing = copy.deepcopy(stories['keywords_set']) match_target = copy.deepcopy(target['keywords_set']) node_cos = match_story(existing, match_target, 0.3) subgs1 = [] for sto in match_target: subgs1.append(nx.subgraph(Gs, sto)) subgs0 = [] for sto in existing: subgs0.append(nx.subgraph(Gs, sto)) matched = [] for i in range(len(subgs1)): matchingGraph = subgs1[i] dis = [] for cand in subgs0: val = compute_distance(matchingGraph, cand) dis.append(val) total = np.multiply(dis, node_cos[i]) match_score = np.max(total) if match_score < tau: stories['keywords_set'].append(target['keywords_set'][i]) stories['doc_set'].append(target['doc_set'][i]) continue print match_score match_ind = np.argmax(total) print match_ind, stories['doc_set'][match_ind], target['doc_set'][i] # match_text = existing[match_ind] stories['keywords_set'][match_ind].extend(match_target[i]) u = stories['doc_set'][match_ind].union(target['doc_set'][i]) stories['doc_set'][match_ind] = u
def test_subgraph_of_subgraph(self): SGv = nx.subgraph(self.G, range(3, 7)) SDGv = nx.subgraph(self.DG, range(3, 7)) SMGv = nx.subgraph(self.MG, range(3, 7)) SMDGv = nx.subgraph(self.MDG, range(3, 7)) for G in self.graphs + [SGv, SDGv, SMGv, SMDGv]: SG = nx.induced_subgraph(G, [4, 5, 6]) assert_equal(list(SG), [4, 5, 6]) SSG = SG.subgraph([6, 7]) assert_equal(list(SSG), [6]) # subgraph-subgraph chain is short-cut in base class method assert_is(SSG._graph, G)
def fitness(new_members, is_print=False): if len(new_members) == 1: return 0 else: new_nodes = set(flatten(map(lambda mem: nx.neighbors(data_graph, mem), new_members))) | new_members global w_in global w_all w_all = len(nx.subgraph(data_graph, new_nodes).edges()) w_in = len(nx.subgraph(data_graph, new_members).edges()) if is_print: print 'w_in', w_in, nx.subgraph(data_graph, new_members).edges() print 'w_all', w_all, nx.subgraph(data_graph, new_nodes).edges() return float(w_in) / w_all
def test_subgraph(self): assert_equal(self.G.subgraph([0, 1, 2, 4]).adj, nx.subgraph(self.G, [0, 1, 2, 4]).adj) assert_equal(self.DG.subgraph([0, 1, 2, 4]).adj, nx.subgraph(self.DG, [0, 1, 2, 4]).adj) assert_equal(self.G.subgraph([0, 1, 2, 4]).adj, nx.induced_subgraph(self.G, [0, 1, 2, 4]).adj) assert_equal(self.DG.subgraph([0, 1, 2, 4]).adj, nx.induced_subgraph(self.DG, [0, 1, 2, 4]).adj) # subgraph-subgraph chain is allowed in function interface H = nx.induced_subgraph(self.G.subgraph([0, 1, 2, 4]), [0, 1, 4]) assert_is_not(H._graph, self.G) assert_equal(H.adj, self.G.subgraph([0, 1, 4]).adj)
def filterGraphByRecipeID(G, Grecipes, Gingredients, recipeNodes): recipe_to_remove = [ n for n in Grecipes.nodes() if n not in recipeNodes] searchGrecipes = nx.subgraph(Grecipes, recipeNodes) searchGrecipes.remove_nodes_from(recipe_to_remove) ingrNodes = list(set([b for n in searchGrecipes.nodes() for b in G.neighbors(n)])) ingr_to_remove = [ n for n in Gingredients.nodes() if n not in ingrNodes] searchGingredients = Gingredients searchG = nx.subgraph(G, recipeNodes + ingrNodes) searchG.remove_nodes_from(recipe_to_remove) searchG.remove_nodes_from(ingr_to_remove) return (searchG, searchGrecipes, searchGingredients)
def plot_induced_subgraphs(self): plt.figure(1) partition = self.find_partition()[1] communities = [partition[v] for v in partition] newGraph=self.G for community in communities: nx.subgraph(newGraph, [key for key in partition if partition[key]==community]) node_color=[float(partition[v]) for v in partition] labels = {} for node in newGraph.nodes(): labels[node]= newGraph.node[node].get('name', '') nx.draw_spring(newGraph,node_color=node_color, labels=labels) plt.show() plt.savefig("C:\\Users\\Heschoon\\Dropbox\\ULB\\Current trends of artificial intelligence\\Trends_project\\graphs\\graph_induced.pdf")
def core_substitution(graph, orig_cip_graph, new_cip_graph): """ graph is the whole graph.. subgraph is the interfaceregrion in that we will transplant new_cip_graph which is the interface and the new core """ assert( set(orig_cip_graph.nodes()) - set(graph.nodes()) == set([]) ), 'orig_cip_graph not in graph' # select only the interfaces of the cips new_graph_interface_nodes = [n for n, d in new_cip_graph.nodes(data=True) if 'core' not in d] new_cip_interface_graph = nx.subgraph(new_cip_graph, new_graph_interface_nodes) original_graph_interface_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' not in d] original_interface_graph = nx.subgraph(orig_cip_graph, original_graph_interface_nodes) # get isomorphism between interfaces, if none is found we return an empty graph iso = get_good_isomorphism(graph, orig_cip_graph, new_cip_graph, original_interface_graph, new_cip_interface_graph) if len(iso) != len(original_interface_graph): # print iso # draw.display(orig_cip_graph) # draw.display(new_cip_graph) #draw.graphlearn([orig_cip_graph, new_cip_graph],size=10) logger.log(5,"grammar hash collision, discovered in 'core_substution' ") return nx.Graph() # ok we got an isomorphism so lets do the merging graph = nx.union(graph, new_cip_graph, rename=('', '-')) # removing old core # original_graph_core_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' in d] original_graph_core_nodes = [n for n, d in orig_cip_graph.nodes(data=True) if 'core' in d] for n in original_graph_core_nodes: graph.remove_node(str(n)) # merge interfaces for k, v in iso.iteritems(): graph.node[str(k)][ 'interface'] = True # i am marking the interface only for the backflow probability calculation in graphlearn, this is probably deleteable because we also do this in merge, also this line is superlong Ooo merge(graph, str(k), '-' + str(v)) # unionizing killed my labels so we need to relabel return nx.convert_node_labels_to_integers(graph)
def node_coordinates_robust(graph): xs, ys, zs, As, namess = [], [], [], [], [] for cc in nx.connected_components(graph.to_undirected()): if len(cc) == 1: x, y, z = [0], [0], [0] A = np.array([[0]]) names = list(cc) elif len(cc) == 2: x, y, z = [0, 1], [0, 1], [0, 1] n1, n2 = list(cc) A = np.array([[0, 1], [1, 0]]) * graph[n1][n2].get('weight', 1) names = list(cc) else: x, y, z, A, names = node_coordinates(nx.subgraph(graph, cc)) xs.append(x) ys.append(y) zs.append(z) As.append(A) namess.append(names) for coord in [xs, ys, zs]: loc = 0 for i, arr in enumerate(coord): arr = np.asanyarray(arr) scale = np.sqrt(arr.size - 0.99) coord[i] = ((arr - np.min(arr)) / np.max(arr) * scale + loc) loc += 1.05 * scale x = np.concatenate(xs) y = np.concatenate(ys) z = np.concatenate(zs) A = coo_mat_concat(As) names = list(itertools.chain(*namess)) return x, y, z, A, names
def congressSubgraph(base_graph, title='Rep'): """Generate a subgraph from a Congerssional Twitter graph based on a member's title""" header = ['lastname','firstname','middlename','title','party','state','gender','twitter_id'] congress_reader = csv.DictReader(open('../twitter_congress.csv', 'rU'), fieldnames=header) all_congress = map(lambda l: l, congress_reader) mem_list = [(a['twitter_id']) for (a) in all_congress if a['title']==title] return nx.subgraph(base_graph, mem_list)
def test_msf_components(): grid, dist_matrix = random_settlements(500) msf = mod_boruvka(grid) msf_subgraph = lambda components: nx.subgraph(msf, components) component_graphs = map(msf_subgraph, nx.connected_components(msf)) def full_graph(g): new_graph = nx.Graph() new_graph.add_nodes_from(g.nodes(data=True)) if len(g.nodes()) < 2: return new_graph new_graph.add_weighted_edges_from([(u, v, dist_matrix[u][v]) for u, v in itertools.product(g.nodes(), g.nodes()) if u != v]) return new_graph full_graphs = map(full_graph, component_graphs) mst_graphs = map(nx.mst.minimum_spanning_tree, full_graphs) diff_component_mst = [] for i in range(len(component_graphs)): c_sets = set([frozenset(e) for e in component_graphs[i].edges()]) mst_sets = set([frozenset(e) for e in mst_graphs[i].edges()]) if not c_sets == mst_sets: diff_component_mst.append(i) assert len(diff_component_mst) == 0, str(len(diff_component_mst)) + \ " components are not MSTs"
def step3_balance_number(G,node): right=0 wrong=0 total=0 cate_num=dict() #the number 16 cases ins=find_in_nodes(G,node) outs=find_out_nodes(G,node) sub_nodes=ins+outs subG=nx.subgraph(G,sub_nodes) for in_node in ins: c_outs=find_in_nodes(subG,in_node) for out_node in c_outs: if out_node in outs: total=total+1 #node-->out_node-->mid_node-->in_node-->node flag1=(G[node][out_node]['weight']==-1) flag2=(G[out_node][in_node]['weight']==-1) flag3=(G[in_node][node]['weight']==-1) key=str(1000+flag1*100+flag2*10+flag3) if cate_num.has_key(key): cate_num[key]=cate_num[key]+1 else: cate_num[key]=1 if((flag1+flag2+flag3)%2==0): right=right+1 else: wrong=wrong+1 return (right,total,cate_num)
def init(): global projectname global version_aray global pos global x global y global size_array global numframes global sg for i in range(6): data_directory = projectname + "_history/" + projectname + version_array[i] + "/" + projectname [g, lines] = creategraph.readfile(data_directory) if i == 0: sg = creategraph.refine(g, 45) [pos, x, y] = creategraph.coordinate(sg) size = creategraph.point_sizes(sg, lines) zeros = np.array([0] * len(size)) print 'len(size) = ', len(size) print 'zeros = ', zeros size_array.append(zeros) size_array.append(size) else: # create the graph induced by nodes from sg subg = nx.subgraph(g, nx.nodes(sg)) print subg, sg if nx.number_of_nodes(subg) != nx.number_of_nodes(sg): print 'panic at 34' else: # v this seems to be a error, but not size = creategraph.point_sizes(sg, lines) size_array.append(size) x = np.array(x) y = np.array(y) size_array = np.array(size_array)
def subgraph_from_pathways(pathways, g): """ returns a topological annotated graph containing given pathways """ nodes = [] if type(pathways) == list: for p in pathways: nodes += nodes_from_pathway(p, g) elif type(pathways) == str: nodes += nodes_from_pathway(pathways, g) h = nx.subgraph(g, nodes) remove_edges = [] if type(pathways) == list: for p in pathways: for e in h.edges(): if not p in h.get_edge_data(*e)['pathways']: remove_edges.append(e) elif type(pathways) == str: for e in h.edges(): if not pathways in h.get_edge_data(*e)['pathways']: remove_edges.append(e) for r in remove_edges: h.remove_edge(*r) return topological_annotate(h)
def greedy_choice(G, candidate, blue_nodes, black_nodes, visited): """Helper function to greedy cut""" G.node[candidate][gc.PARTITION] = gc.BLUE blue_cut_val = gc.cut_edges(nx.subgraph(G, visited)) G.node[candidate][gc.PARTITION] = gc.BLACK black_cut_val = gc.cut_edges(nx.subgraph(G, visited)) if blue_cut_val > black_cut_val: G.node[candidate][gc.PARTITION] = gc.BLUE blue_nodes.add(candidate) else: black_nodes.add(candidate) return blue_nodes, black_nodes
def remove_bridges(self, in_file_, start_id, delim_): reader = csv.reader(open(in_file_), delimiter=delim_) for line in reader: self.G.remove_edge(int(line[0]) - start_id,int(line[1]) - start_id) print "no of components after removing bridges: %d" % nx.number_connected_components(self.G) comps = nx.connected_components(self.G) for comp in comps: print len(comp) bfs = self.BreadthFirstLevels(1,100) nbunch = [1] for n in bfs: #print(n) val_ = n.values() for set_ in val_: nbunch += list(set_) #print nbunch print "start creating the induced graph!" induced_g = nx.subgraph(self.G, nbunch) self.G.clear() # start_ = 0 # for n_ in induced_g: # self.maps_[n_] = start_ # start_ += 1 # for n_1 in induced_g: # for n_2 in induced_g: # if n_1 in induced_g.neighbors(n_2): # self.G.add_edge(maps_[n_1],maps_[n_2]) self.n = nx.number_of_nodes(induced_g) self.G = induced_g print "no of node: %d and no of edges: %d in induce graph!" % (self.G.number_of_nodes(), self.G.number_of_edges())
def compute_global_utility(graph): """ Return an index that quantifies how big the size of adopter clusters is in the entire population of consumers. We call this index 'Global utility' in our article. This index computes the cluster-size-weighted average of adopter clusters divided by the total number of consumers So it goes from 0 to 1 and it's always increasing. """ N = len(graph.nodes()) adopters = get_adopters(graph) clusters = nx.subgraph(graph, adopters) cluster_sizes = [len(c) for c in nx.connected_components(clusters) if len(c) > 1] if cluster_sizes: # The weight of each cluster depends on its size weights = np.array(cluster_sizes) / N # Compute the weighted average weigthed_average = np.average(cluster_sizes, weights=weights) # Since the index needs to go between 0 and 1, we need to divide between N # again utility = weigthed_average / N return utility else: return 0
def expand(seed_set): members = seed_set print 'seed:', members, nx.subgraph(data_graph, set( flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) | members).edges() is_change = True while is_change: to_check_neighbors = list(flatten(map(lambda mem: nx.neighbors(data_graph, mem), members))) random.shuffle(to_check_neighbors) print to_check_neighbors is_change = False # for neighbor in to_check_neighbors: for neighbor in to_check_neighbors: if fitness(members | {neighbor}) > fitness(members): is_change = True members.add(neighbor) fitness(members, is_print=True) print 'add neighbor:', neighbor, members, 'w_in:', w_in, 'w_all:', w_all break for member in members: if fitness(members - {member}) > fitness(members): is_change = True members.remove(member) fitness(members, is_print=True) print 'remove member:', member, members, 'w_in', w_in, 'w_all:', w_all break print set(members) print '\n----------------------------\n'
def main(): """ Pre-processing: load data, compute centrality measures, write files with node data """ print(nx.__version__) # Load network data, create storage dict, and extract main component depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) depends.name="depends" suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) suggests.name="suggests" imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) imports.name="imports" nets_dict={"depends":depends,"suggests":suggests,"imports":imports} for k in nets_dict.keys(): main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes() nets_dict[k]=nx.subgraph(nets_dict[k],main_component) # Run multiple measures on graphs and normalize weights measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank] for g in nets_dict.values(): multiple_measures(g,measure_list) normalize_weights(g) # Output networks in GraphML format (to store node attributes) for i in nets_dict.items(): # print(i[1].edges(data=True)) nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml") print("") print("All files written with data") """Visualization:
def weak_link_distribution(bn,N_clus=2,mcs=0,n_jumps=1): jumps=[j[0] for j in networkb.find_th_jumps(bn,N_clus)] jumps=sorted(list(set(jumps)),reverse=True) if len(jumps)==0: return [] pcs=jumps[0:min(n_jumps,len(jumps)-1)] d=[] for pc in pcs: G=bn.get_Graph(pc,correlation='positive') cluster_list=[x for x in networkx.connected_components(G) if x>mcs] if len(cluster_list)<2: continue thmin=(jumps[jumps.index(pc)+1]+pc)/2 print thmin print pc H=bn.get_Graph(thmin,th_up=pc,correlation='positive') if H.number_of_edges()<1: continue H=networkx.subgraph(H,itertools.chain.from_iterable(cluster_list)) if H.number_of_edges()<1: continue for e in H.edges_iter(): d.append(bn.nodedistance(e)) json.dump(d,open(bn.weak_link_distribution_file,'w')) return d
def filter_relevant_ids(graph): """ Dado el grafo de mis followed y sus followed, extraemos los 100 nodos más relevantes """ graph = nx.read_gpickle('graph.gpickle') my_followed = list(set([x[0] for x in graph.edges()])) graph = nx.subgraph(graph, my_followed) def get_nfollowed(nid): return len(graph.successors(nid)) def get_nfollowers(nid): return len(graph.predecessors(nid)) import pandas as pd df = pd.DataFrame() df['nodeid'] = my_followed df['nfollowed'] = df['nodeid'].apply(get_nfollowed) df['nfollowers'] = df['nodeid'].apply(get_nfollowers) relevant = df[(df.nfollowed > 40) & (df.nfollowers > 40)] relevantids = list(relevant.nodeid.values) with open('layer0.pickle','wb') as f: pickle.dump(relevantids, f) return relevantids
def upper_bound(G, k, H): ''' the upper bound of size of k-plex of H in G ''' # upper_bound_by_deg = min([nx.degree(G, node) for node in H]) + k # upper_bound_by_deg = 100 subg = nx.subgraph(G, H) validate_neighbors = {node for node in neighbors(G, H) if len(set(G.neighbors(node)).intersection(H)) >= len(H)+1-k} strict_nodes = [node for node in H if len(subg[node]) == len(H)-k] if len(strict_nodes) > 0: avaliable_nodes = {node for node in G.neighbors(strict_nodes[0]) if node not in H} for i in range(1, len(strict_nodes)): avaliable_nodes.intersection_update({node for node in G.neighbors(strict_nodes[i]) if node not in H}) avaliable_nodes.intersection_update(validate_neighbors) return len(H)+len(avaliable_nodes) else: min_d = float('inf') for node in H: nbrs = neighbors(G, [node]) nbrs.intersection_update(validate_neighbors) num_non_nbrs = k-1-(subg.number_of_nodes() - nx.degree(subg, node)) min_d = min(min_d, len(nbrs)+num_non_nbrs) return min_d
def find_football_communities(): """ Finds the communities produced for the football network, uses compare methods to graph """ fgraph = CD.football_graph() known = CD.football_known_c() temp7 = known[7] temp8 = known[8] temp9 = known[9] known[7] = temp8 known[8] = temp9 known[9] = temp7 center_g = nx.Graph() center_g.add_nodes_from(range(12)) centers = nx.circular_layout(center_g, scale = 10) pos = {} subgraphs = [nx.subgraph(fgraph, c) for c in known] count = -1 for g in subgraphs: count += 1 (off_x, off_y) = centers[count] pos_local = nx.circular_layout(g, scale=2.) for n, place in pos_local.iteritems(): pos[n] = place + np.array([off_x, off_y]) compare_methods(fgraph, 'football_', param=[1., 1., 5./115., 4, 0, .7, 20], known=known, pos=pos, color_map={76:1, 11:2, 7:3, 102:4, 104:5, 47:6, 98:7, 96:8, 23:9, 94:10, 27:0}, data_path="FootballGames/football_metis")
def bound_branch(G, k ,q_nodes, is_use_cores=False, select_method='rand'): ''' wrapper of branch and bound method ''' ts = time.time() global optimal optimal = set() k_neighbors = k_hop_nbrs_n(G, k, q_nodes) sub = set(q_nodes) sub.update(k_neighbors) g = nx.subgraph(G, sub) if is_use_cores: cores = nx.core_number(g) else: cores = None # print('subgraph ', g.nodes()) print('minimum degree of subgraph', minimum_degree(g)) print('k neighbors', len(k_neighbors)) BB(g, k, q_nodes, set(), cores, select_method) print('the solution is', optimal) te = time.time() texe = round(te-ts, 2) # the execution time return texe
def mean_geodesic(pg, debug=0): """ mean_geodesic() calculates the mean geodesic (shortest) distance between two vertices in a network. """ length_sum = 0 if networkx.is_directed_acyclic_graph(pg): n_pairs_with_paths = 0 else: n_pairs_with_paths = ( pg.order() * ( pg.order() + 1 ) ) / 2 tg = networkx.subgraph(pg, pg.nodes()) for u in pg.nodes_iter(): tg.delete_node(u) for v in tg.nodes_iter(): try: length = networkx.shortest_path_length(pg,u,v) if length > 0: length_sum = length_sum + length if networkx.is_directed_acyclic_graph(pg): n_pairs_with_paths = n_pairs_with_paths + 1 except networkx.exception.NetworkXError: pass try: geodesic = float(length_sum) / float(n_pairs_with_paths) except: geodesic = -999. if debug: print 'length_sum:\t', length_sum print 'n_pairs_with_paths:\t', n_pairs_with_paths return geodesic
def vis_coauthor_communities(graph, source, i, prefix, options, radius, overlap): """ Finds the communities produced by different methods for the astro citation network """ interest = CD.get_ball(graph, options[source][i], radius) print "Displaying and computing for a subset of ", len(interest), " nodes." sgraph = nx.subgraph(graph, interest) cleaned = {} for key in options.keys(): """ for generating sub community structure """ if key == source: # split the overarching with the substructure cleaned[source] = [options[source][i]] options['Parallel Subcommunities'] = options[source][:i] options['Parallel Subcommunities'].extend(options[source][i+1:]) key = 'Parallel Subcommunities' filtered = [filter(lambda n: n in interest, c) for c in options[key]] filtered = filter(lambda c: len(c) > 0, filtered) cleaned[key] = filtered cleaned[key] = CD.clean_of_duplicate_c(cleaned[key], overlap=overlap) compare_methods(sgraph, prefix, options=cleaned)
def dyad_census(pg, debug=0, debuglog=0): """ dyad_census() calculates the number of null, asymmetric, and mutual edges between all pairs of nodes in a directed graph. """ if not networkx.is_directed_acyclic_graph(pg): logging.error('pyp_network.dyad_census() requires a directed graph as input!') return 0 else: census = {} census['null'] = 0 census['asymmetric'] = 0 census['mutual'] = 0 tg = networkx.subgraph(pg, pg.nodes()) for u in pg.nodes_iter(): tg.delete_node(u) for v in tg.nodes_iter(): if not pg.has_neighbor(u,v): census['null'] = census['null'] + 1 elif u in pg.predecessors(v) and v in pg.successors(u): census['mutual'] = census['mutual'] + 1 if debug: print 'Nodes %s and %s link to one another!' % ( u, v ) if debuglog: logging.error('Nodes %s and %s link to one another!',u, v) elif u in pg.predecessors(v) and v not in pg.successors(u): census['asymmetric'] = census['asymmetric'] + 1 elif u not in pg.predecessors(v) and v in pg.successors(u): census['asymmetric'] = census['asymmetric'] + 1 else: pass del(tg) return census
def network_properties(network : nx.DiGraph, in_degree_threshold : float = -1, pagerank_threshold : float = -1, damping : float = 0.85, spectral_offset : float = 0.5)\ -> (pd.DataFrame, sparse.spmatrix): conn = max(nx.connected_components(network.to_undirected()), key=len) conn = nx.subgraph(network, conn) pr = compute_pagerank(conn, damping=damping) names = nx.nodes(conn) indeg = [conn.in_degree(n) for n in names] odeg = [conn.out_degree(n) for n in names] description = [conn.node[n].get('description', n) for n in names] x, y, z, Adj, aff_names = node_coordinates(conn, nodelist=names, offset=spectral_offset) data = {'id': names, 'in_degree': indeg, 'out_degree': odeg, 'pagerank': pr, 'affinity_x': x, 'affinity_y': y, 'processing_depth': z, 'description': description} df = pd.DataFrame(data, index=names) df = df[df['pagerank'] > pagerank_threshold / len(names)] df = df[df['in_degree'] > in_degree_threshold] return df, Adj
def get_all_hardware_grid_problems( device_graph: nx.Graph, central_qubit: cirq.GridQubit, n_instances: int, rs: np.random.RandomState, ): """Helper function to get all subgraphs for a given named device. This is annotated with lru_cache so you can freely call this function multiple times without re-constructing the list of qubits. Used by `generate_hardware_problem_problem` to get a subgraph for a given value of n_qubits. Returns: A dictionary indexed by n_qubit, instance_i """ all_hg_problems: Dict[Tuple[int, int], HardwareGridProblem] = {} subgraphs = get_growing_subgraphs(device_graph=device_graph, central_qubit=central_qubit) for n_qubits in sorted(subgraphs): subgraph = nx.subgraph(device_graph, subgraphs[n_qubits]) for instance_i in range(n_instances): problem = random_plus_minus_1_weights(subgraph, rs=rs) qubits = sorted(problem.nodes) coordinates = [(q.row, q.col) for q in qubits] problem = nx.relabel_nodes(problem, {q: i for i, q in enumerate(qubits)}) all_hg_problems[n_qubits, instance_i] = HardwareGridProblem( graph=problem, coordinates=coordinates, ) return all_hg_problems
def _build_list(self): print "using disconnectivity analysis to find minima to untrap" self.minpairs = deque() graph = TSGraph(self.database).graph cclist = list(nx.connected_components(graph)) # get the largest cluster group1 = cclist[0] min1 = sorted(group1, key=lambda m: m.energy)[0] if not min1 == self.database.minima()[0]: # make sure that the global minimum is in group1 print "warning, the global minimum is not the in the largest cluster." # compute the energy barriers for all minima in the cluster subgraph = nx.subgraph(graph, group1) energy_barriers = self._compute_barriers(subgraph, min1) # sort the minima by the barrier height divided by the energy difference weights = [(m, np.abs(barrier) / np.abs(m.energy - min1.energy)) for (m, barrier) in energy_barriers.iteritems()] weights.sort(key=lambda v: 1. / v[1]) self.minpairs = deque() for min2, w in weights: if len(self.minpairs) > self.list_len: break if not self.is_good_pair(min1, min2): continue self.minpairs.append((min1, min2)) if True: # print some stuff print " untrap analysis: minimum", min2.id(), "with energy", min2.energy, "barrier", energy_barriers[ min2], "untrap weight", w
def __HeadTailCommunityDetection(G, finaledgelist, head_tail_ratio=0.6): H = nx.connected_components(G) for s in H: subgraph = nx.subgraph(G, s) result = nx.edge_betweenness(subgraph, normalized=False) edges = list(result.keys()) values = list(result.values()) mean = np.mean(values) edgelist = [] edgetemp = subgraph.edges() if len(edgetemp) <= 2: for edge in edgetemp: finaledgelist.append(edge) else: for index in range(len(values)): if values[index] <= mean: edgelist.append(edges[index]) if float(len(edgelist)) / float( len(edges) ) <= head_tail_ratio: # change the head/tail division rule here, here is for tail percentage, # so if the rule is 40/60, the value should be assigned 0.6 as in the code. for edge in edgelist: finaledgelist.append(edge) else: Gsub = nx.Graph() for edge in edgelist: Gsub.add_edge(edge[0], edge[1]) try: __HeadTailCommunityDetection(Gsub, finaledgelist, head_tail_ratio) except: pass return finaledgelist
def avg_distance(graph: nx.Graph, communities: object, **kwargs: dict) -> object: """Average distance. The average distance of a community is defined average path length across all possible pair of nodes composing it. :param graph: a networkx/igraph object :param communities: NodeClustering object :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**. :return: If **summary==True** a FitnessResult object, otherwise a list of floats. Example: >>> from cdlib.algorithms import louvain >>> from cdlib import evaluation >>> g = nx.karate_club_graph() >>> communities = louvain(g) >>> scd = evaluation.avg_distance(g,communities) """ return __quality_indexes( graph, communities, lambda graph, coms: nx.average_shortest_path_length( nx.subgraph(graph, coms)), **kwargs)
def is_consistent(queue, G, reactions, trail): """Checks whether the trail also makes sense when between a metabolic pathway and the original gene neighborhood G. :param queue: multiprocessing queue for storing the result of the procedure :param G: undirected graph representing a genome, with genes for vertices (i.e. not reaction identifiers as are vertices in D). :param reactions: dict of dicts storing reaction information (obtained by parsing a KGML file) :param trail: CoMetGeNe trail :return: True if trail is consistent with respect to the gene neighborhood graph G, False otherwise """ G_nodes = G.nodes() gene_trail = list() for vertex in trail: gene_vertex = list() for gene in reactions[vertex]['enzyme']: if gene in G_nodes: gene_vertex.append(gene) gene_trail.append(gene_vertex) decompositions = decompose(gene_trail) for decomp in decompositions: for X in nx.connected_components(nx.subgraph(G, decomp)): skip = False for vertex in trail: if not skip: enzyme_names = reactions[vertex]['enzyme'] if len(set(X) & set(enzyme_names)) == 0: skip = True if not skip: queue.put(True) return queue.put(False)
def parse_graph_from_csv(file, largest_cc=False, skip_first_line=True): """ Parses a file from a SNAP Dataset and returns a networkx graph. Parameters ---------- file : file largest_cc : bool Whether the complete graph in the dataset should be returned, or only its largest connected component. """ graph_name = os.path.splitext(os.path.basename(file.name))[0] if skip_first_line: file.readline() # ignore first line `node_1,node_2` graph = nx.read_adjlist(file, delimiter=",", nodetype=int) # Change indices to start from zero if necessary if min(graph.nodes()) == 1: # print("Reducing indices by one to have them starting at zero.") graph = nx.relabel_nodes(graph, lambda x: x - 1, copy=False) if largest_cc: original_size = len(graph) largest_component = max(nx.connected_components(graph), key=len) subgraph = nx.subgraph(graph, largest_component) graph = nx.Graph(subgraph) if len(graph) < original_size: print( "Only considering largest connected component with %d nodes. Original graph had %d nodes." % (len(graph), original_size)) else: print("Graph has only one connected component.") return graph_name, graph
def has_skipped_vertices_G(trail, reactions, G): """Determines whether the given CoMetGene trail skips any genes. :param trail: CoMetGene trail :param reactions: dict of dicts storing reaction information (obtained by parsing a KGML file) :param G: undirected graph built on the same vertex set as the metabolic pathway (see Model in the methods section of Zaharia et al., 2018) :return: True if the CoMetGeNe trail was obtained skipping at least one gene, False otherwise """ involved = set() # genes involved in this trail for r_id in trail: for gene in reactions[r_id]['enzyme']: involved.add(gene) G_sub = nx.subgraph(G, trail) for v1, v2 in G_sub.edges(): if 'skipped' in G_sub[v1][v2]: for vertex in G_sub[v1][v2]['skipped']: if vertex not in involved: return True return False
def entire_workflow(m, steps, evaluate_function, weight_function): grid = nx.grid_graph([m, m]) for v in grid.nodes(): grid.node[v]["X"] = v[0] grid.node[v]["Y"] = v[1] grid.node[v]["pos"] = [v[0], v[1]] grid.graph["size"] = 0 grid.graph["steps"] = steps path = random_walk(grid, steps, True) boundary_nodes = [] for x in grid.nodes(): if grid.degree(x) <= 3: boundary_nodes.append(x) boundary = nx.subgraph(grid, boundary_nodes) restricted_path = restrict_path(boundary_nodes, path) weight_function(boundary) series = create_time_series(boundary, restricted_path, evaluate_function) return [series, restricted_path, boundary]
def erdos_renyi_modularity(graph, communities, **kwargs): """Erdos-Renyi modularity is a variation of the Newman-Girvan one. It assumes that vertices in a network are connected randomly with a constant probability :math:`p`. .. math:: Q(S) = \\frac{1}{m}\\sum_{c \\in S} (m_S − \\frac{mn_S(n_S −1)}{n(n−1)}) where :math:`m` is the number of graph edges, :math:`m_S` is the number of community edges, :math:`l_S` is the number of edges from nodes in S to nodes outside S. :param graph: a networkx/igraph object :param communities: NodeClustering object :return: FitnessResult object Example: >>> from cdlib.algorithms import louvain >>> from cdlib import evaluation >>> g = nx.karate_club_graph() >>> communities = louvain(g) >>> mod = evaluation.erdos_renyi_modularity(g,communities) :References: 1. Erdos, P., & Renyi, A. (1959). `On random graphs I. <https://gnunet.org/sites/default/files/Erd%C5%91s%20%26%20R%C3%A9nyi%20-%20On%20Random%20Graphs.pdf/>`_ Publ. Math. Debrecen, 6, 290-297. """ graph = convert_graph_formats(graph, nx.Graph) m = graph.number_of_edges() n = graph.number_of_nodes() q = 0 for community in communities.communities: c = nx.subgraph(graph, community) mc = c.number_of_edges() nc = c.number_of_nodes() q += mc - (m * nc * (nc - 1)) / (n * (n - 1)) return FitnessResult(score=(1 / m) * q)
def __init__(self, function, n_nodes=None, p_edges=None, max_iter_time=60 ): if n_nodes is None: self.n_nodes = [int(i) for i in [1e2, 3e2, 1e3]] else: self.n_nodes = n_nodes if p_edges is None: self.p_edges = [0.0005, 0.001] else: self.p_edges = p_edges self.function = function self.simulation_grid = self.expand_grid_local(self.n_nodes, self.p_edges) self._simulation_graphs = [nx.subgraph(graph, nbunch=max(nx.connected_components(graph), key=len)) for graph in self.random_graph_set(self.n_nodes, self.p_edges)] self.simulation_grid['n_edges'] = [graph.number_of_edges() for graph in self._simulation_graphs] self.computing_time = list() self.computing_time_df = self.simulation_grid.copy() self.max_iter_time = max_iter_time
def avg_transitivity(graph: nx.Graph, communities: object, **kwargs: dict) -> object: """Average transitivity. The average transitivity of a community is defined the as the average clustering coefficient of its nodes w.r.t. their connection within the community itself. :param graph: a networkx/igraph object :param communities: NodeClustering object :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**. :return: If **summary==True** a FitnessResult object, otherwise a list of floats. Example: >>> from cdlib.algorithms import louvain >>> from cdlib import evaluation >>> g = nx.karate_club_graph() >>> communities = louvain(g) >>> scd = evaluation.avg_transitivity(g,communities) """ return __quality_indexes( graph, communities, lambda graph, coms: nx.average_clustering(nx.subgraph(graph, coms)), **kwargs)
def hub_dominance(graph: nx.Graph, communities: object, **kwargs: dict) -> object: """Hub dominance. The hub dominance of a community is defined as the ratio of the degree of its most connected node w.r.t. the theoretically maximal degree within the community. :param graph: a networkx/igraph object :param communities: NodeClustering object :param summary: boolean. If **True** it is returned an aggregated score for the partition is returned, otherwise individual-community ones. Default **True**. :return: If **summary==True** a FitnessResult object, otherwise a list of floats. Example: >>> from cdlib.algorithms import louvain >>> from cdlib import evaluation >>> g = nx.karate_club_graph() >>> communities = louvain(g) >>> scd = evaluation.hub_dominance(g,communities) """ return __quality_indexes( graph, communities, lambda graph, coms: max( [x[1] for x in list(nx.degree(nx.subgraph(graph, coms)))]) / (len(coms) - 1), **kwargs)
def canonical_order(graph, face): ''' Outputs the coordinates of the nodes of the face in a canonical order in particular, the first one is the lex-min. You need to use the graph structure to make this work ''' lex_sorted_nodes = sorted(face) first_node = lex_sorted_nodes[0] cycle_sorted_nodes = [first_node] local_cycle = nx.subgraph(graph, face) #Compute the second node locally based on angle orientation v = first_node locations = [] neighbor_list = list(local_cycle.neighbors(v)) for w in neighbor_list: locations.append(graph.nodes[w]["pos"] - graph.nodes[v]["pos"]) angles = [float(np.arctan2(x[1], x[0])) for x in locations] neighbor_list.sort(key=dict(zip(neighbor_list, angles)).get) second_node = neighbor_list[0] cycle_sorted_nodes.append(second_node) ##Now compute a canonical ordering of local_cycle, clockwise, starting ##from first_node while len(cycle_sorted_nodes) < len(lex_sorted_nodes): v = cycle_sorted_nodes[-1] neighbor_list = list(local_cycle.neighbors(v)) neighbor_list.remove(cycle_sorted_nodes[-2]) cycle_sorted_nodes.append(neighbor_list[0]) return cycle_sorted_nodes
def _graphize_streets(self, highways, node_suffix=''): self.logger.debug('Creating street skeleton graph...') street_graph = nx.Graph() for h in highways: raw_ns = h.get_nodes() for n1, n2 in _walk2(raw_ns): # trimming streets out of box if not ((self.box[0] < n1.lat < self.box[2]) and (self.box[1] < n1.lon < self.box[3])): if not ((self.box[0] < n2.lat < self.box[2]) and (self.box[1] < n2.lon < self.box[3])): continue street_graph.add_node(str(n2.id) + node_suffix, pos=asarray([n1.lon, n1.lat]).astype(float), type='street') street_graph.add_node(str(n1.id) + node_suffix, pos=asarray([n2.lon, n2.lat]).astype(float), type='street') street_graph.add_edge(str(n2.id) + node_suffix, str(n1.id) + node_suffix, phases=3, type='street') # it may be possible that the streets are not connected. In that case, we only take the biggest component. # one could also choose to add nonexistent street edges or to find the minimum street path to connect # everything with openstreetmaps (dangerous). if not nx.is_connected(street_graph): self.logger.warning('The original graph is not connected. Only the biggest component will be kept.') subgraphs = [nx.subgraph(street_graph, n) for n in nx.connected_components(street_graph)] sizes = {len(x.nodes): x for x in subgraphs} biggest_sg = sizes[max(sizes.keys())] killnodes = [n for n in street_graph.nodes if n not in biggest_sg.nodes] street_graph.remove_nodes_from(killnodes) return street_graph
def show_subgraphs(ldag, context="edge", load_cache=True, graph=None): if graph is None: graph = ldag.graph l_buf = [] separator = "\n\n" iterobj = sorted(nx.connected_components(graph.to_undirected()), key=len, reverse=True) for sgid, nodes in enumerate(iterobj): if len(nodes) == 1: continue l_graph_buf = ["Subgraph {0} ({1} nodes)".format(sgid, len(nodes))] subg = nx.subgraph(graph, nodes) for edge in remove_edge_duplication(subg.edges(), ldag, graph=graph): msg = edge_view(edge, ldag, context=context, load_cache=load_cache, graph=graph) l_graph_buf.append(msg) l_buf.append("\n".join(l_graph_buf)) return separator.join(l_buf)
def caculateSST(G, fracs): KSD_DD = [0] * len(fracs) KSD_CCD = [0] * len(fracs) ND_DD = [0] * len(fracs) ND_CCD = [0] * len(fracs) # SDD_DD = [0] * len(fracs) # SDD_CCD = [0] * len(fracs) reNodes = SST(G) for j in range(len(fracs)): tempNodes = [] for node in reNodes: if len(tempNodes) >= fracs[j] * G.number_of_nodes(): break tempNodes.append(node) sampleG = nx.subgraph(G, tempNodes) dd2 = DD(sampleG) ccd2 = CCD(sampleG) KSD_DD[j] = KSD(dd1, dd2) KSD_CCD[j] = KSD(ccd1, ccd2) ND_DD[j] = ND(dd1, dd2) ND_CCD[j] = ND(ccd1, ccd2) # SDD_DD[j] = SDD(dd1, dd2) # SDD_CCD[j] = SDD(ccd1, ccd2) return KSD_DD, KSD_CCD, ND_DD, ND_CCD
def init(): global max_depth global T global grid_dim_x global grid_dim_y global base global route_length global total_distance global total_reward total_distance = 0 total_reward = 0 max_depth = 2 T = 3 grid_dim_x = 26 grid_dim_y = 26 base = 0.0 route_length = 9000 global graph global dist global sigma1 global avg_strat1 global regret1 global route df = pd.read_csv("data/paws_mdp_out.txt", sep=" ") dist = pd.read_csv("data/dist.gop", sep=" ", header=None) graph = \ nx.from_pandas_dataframe(df, source='node_from', target='node_to', edge_attr=['distance', 'animal_density', 'grid_cell_x', 'grid_cell_y']) graph = nx.subgraph(graph, nx.node_connected_component(graph, base)) sigma1 = [[1/(grid_dim_x*grid_dim_y)] * grid_dim_y] * grid_dim_x avg_strat1 = [[0] * grid_dim_y] * grid_dim_x regret1 = [[0] * grid_dim_y] * grid_dim_x route = []
def get_stat_datasets(): # path = '/network/rit/lab/ceashpc/share_data/GraphOpt/datasets/DBLP/DBLP_Citation_2014_May/DM' path = '/network/rit/lab/ceashpc/share_data/GraphOpt/datasets/DBLP/DBLP_Citation_2014_May/DB' # fn = 'dm_top30000_dataset.pkl' fn = 'db_top30000_dataset.pkl' with open(os.path.join(path, fn), 'rb') as rfile: dataset = pickle.load(rfile) first_graph = dataset['first_graph'] second_graph = dataset['second_graph'] print(nx.density(first_graph)) print(nx.density(second_graph)) print(first_graph.number_of_nodes()) print(first_graph.number_of_edges()) print(second_graph.number_of_nodes()) print(second_graph.number_of_edges()) print(nx.is_connected(first_graph)) print(nx.is_connected(second_graph)) lcc = max(nx.connected_component_subgraphs(second_graph), key=len) subgraph = nx.subgraph(second_graph, lcc) print(subgraph.number_of_nodes()) print(subgraph.number_of_edges())
def set_splice_graph_old(self, sg, component, target): self.graph = sg.get_graph() self.tx_paths = sg.annotation self.original_tx_paths = sg.annotation # tx paths all ways without trimming known_edges = set([(tx[i], tx[i + 1]) for tx in self.tx_paths for i in range(len(tx) - 1)]) self.component = component self.target = target self.sub_graph = nx.subgraph(self.graph, self.component) # add any possible tx that uses novel edges to list of known txs for tx in nx.all_simple_paths(self.sub_graph, source=self.component[0], target=self.component[-1]): novel = False for i in range(len(tx) - 1): if (tx[i], tx[i + 1]) not in known_edges: novel = True if novel: self.tx_paths.append(tx) self.inc_lengths, self.skip_lengths = [], [ ] # call set all_path_lengths method self.all_path_coordinates = [] # call set_all_path_coordinates method
for its in range(1, 100): et = [0, 0] while et[0] == et[1]: ed = random.choice(el) et = [ int(df.loc[df["ID"] == ed[0], "newtree" + str(its - 1)]), int(df.loc[df["ID"] == ed[1], "newtree" + str(its - 1)]) ] sgn = [] for n in graph.nodes(): if int(df.loc[df["ID"] == n, "newtree" + str(its - 1)]) in et: sgn.append(n) sgraph = nx.subgraph(graph, sgn) edd = {0: et[0], 3: et[1]} newtree_partial = recursive_tree_part(sgraph, 2, "POP10", .05, 2) newtree = {} for n in graph.nodes(): if n not in sgn: newtree[n] = int(df.loc[df["ID"] == n, "newtree" + str(its - 1)]) else: newtree[n] = edd[newtree_partial[n]] df["newtree" + str(its)] = df["ID"].map(newtree) #df["newtree"+str(its)]=pd.to_numeric(df["newtree"+str(its)]) df.plot(column="newtree" + str(its), cmap="tab20")
def setup(self, **keywords): """ 2012.10.15 run before anything is run """ AbstractMatrixFileWalker.setup(self, **keywords) #self.writer = BeagleGenotypeFile(path=self.outputFname, mode='w') #read in the IBD check result self.ibdData = SNP.readAdjacencyListDataIntoMatrix(inputFname=self.pedigreeKinshipFilePath, \ rowIDHeader=None, colIDHeader=None, \ rowIDIndex=0, colIDIndex=1, \ dataHeader=None, dataIndex=2, hasHeader=False) #. read in the alignment coverage data alignmentCoverageFile = MatrixFile( path=self.individualAlignmentCoverageFname) alignmentCoverageFile.constructColName2IndexFromHeader() alignmentReadGroup2coverageLs = alignmentCoverageFile.constructDictionary( keyColumnIndexList=[0], valueColumnIndexList=[1]) alignmentCoverageFile.close() sys.stderr.write( "Reading in all samples from %s VCF input files ... \n" % (len(self.inputFnameLs))) # read all the Beagle files individualID2HaplotypeData = {} for inputFname in self.inputFnameLs: vcfFile = VCFFile(inputFname=inputFname) #vcfFile.readInAllHaplotypes() for individualID in vcfFile.getSampleIDList(): individualID2HaplotypeData[individualID] = None #haplotypeList = vcfFile.getHaplotypeListOfOneSample(individualID) #individualID2HaplotypeData[individualID] = PassingData(haplotypeList=haplotypeList, # locusIDList=vcfFile.locusIDList) # get all haplotypes , etc. # get all sample IDs sys.stderr.write("%s individuals total.\n" % (len(individualID2HaplotypeData))) #. read in the pedigree or deduce it from Beagle Trio/Duo genotype file (columns) #. construct individualID2pedigreeContext, context: familySize=1/2/3, familyPosition=1/2 (parent/child) sys.stderr.write("Constructing individualID2pedigreeContext ...") plinkPedigreeFile = PlinkPedigreeFile(path=self.pedigreeFname) pGraph = plinkPedigreeFile.pedigreeGraph #shrink the graph to only individuals with data pGraph = nx.subgraph(pGraph, individualID2HaplotypeData.keys()) cc_subgraph_list = nx.connected_component_subgraphs( pGraph.to_undirected()) individualID2familyContext = {} outDegreeContainer = NumberContainer(minValue=0) familySizeContainer = NumberContainer(minValue=0) individualCoverageContainer = NumberContainer(minValue=0) familyCoverageContainer = NumberContainer(minValue=0) for cc_subgraph in cc_subgraph_list: familySize = len(cc_subgraph) familySizeContainer.addOneValue(familySize) familyCoverage = 0 for n in cc_subgraph: #assuming each family is a two-generation trio/nuclear family individualCoverage = self.getIndividualCoverage( individualID=n, alignmentReadGroup2coverageLs=alignmentReadGroup2coverageLs ) individualCoverage = float(individualCoverage) individualCoverageContainer.addOneValue(individualCoverage) familyCoverage += individualCoverage in_degree = pGraph.in_degree(n) out_degree = pGraph.out_degree(n) outDegreeContainer.addOneValue(out_degree) familyContext = PassingData(familySize=familySize, in_degree=in_degree, out_degree=out_degree, \ individualCoverage=individualCoverage,\ familyCoverage=None) if n not in individualID2familyContext: individualID2familyContext[n] = familyContext else: sys.stderr.write( "Node %s already in individualID2familyContext.\n" % (n)) familyCoverageContainer.addOneValue(familyCoverage) #set the family coverage for each member, used in weighing the individual. better covered family => better haplotype for n in cc_subgraph: individualID2familyContext[n].familyCoverage = familyCoverage plinkPedigreeFile.close() sys.stderr.write("%s individuals.\n" % (len(individualID2familyContext))) # weigh each unique individual based on its sequencing coverage + no of offspring => probability mass for each individual sys.stderr.write( "Weighing each individual , assigning probability mass ...") individualID2probabilityMass = {} for individualID, familyContext in individualID2familyContext.items(): outDegreeQuotient = outDegreeContainer.normalizeValue( familyContext.familySize) individualCoverageQuotient = individualCoverageContainer.normalizeValue( familyContext.individualCoverage) #familyCoverageQuotient = familyCoverageContainer.normalizeValue(familyContext.familyCoverage) importanceScore = outDegreeQuotient + individualCoverageQuotient representativeImportanceScore = importanceScore individualID2probabilityMass[ individualID] = representativeImportanceScore sys.stderr.write(" %s IDs with probability mass assigned.\n" % (len(individualID2probabilityMass))) self.individualID2probabilityMass = individualID2probabilityMass self.individualID2HaplotypeData = individualID2HaplotypeData
cache[fingerprint] = (mcost, mpath) return (mcost, mpath) main(None) #wrapper(main) import pygraphviz from networkx.drawing.nx_agraph import write_dot #print(G.edges(data=True)) #print(G.nodes) #labels = nx.get_edge_attributes(G,'weight') #nx.draw_networkx_edge_labels(G,pos=nx.spring_layout(G),edge_labels=labels) cnt = 0 cache = dict() write_dot(G, "maze.dot") nx.draw_spring(G, with_labels=True) plt.savefig("maze_nwx.png") # cut the graph into the four distinct different graphs graphs = list() for i in range(0, 4): graphs.append(nx.subgraph(G, nx.node_connected_component(G, "@" + str(i)))) cache = dict() print(findtheway(graphs, ["@0", "@1", "@2", "@3"]))
def proj_mp(graph, weight, A, sparsity, lmbd, max_iter=10, epsilon=1e-3): current_x = proj_init_point(graph.number_of_nodes()) + 1e-6 edges = np.array(graph.edges) edge_weights = np.ones(graph.number_of_edges()) start_time = time.time() for i in range(max_iter): print('iter {}'.format(i)) iter_time = time.time() gradient = proj_get_gradient(current_x, weight, A, lmbd) normalized_gradient = normalize_gradient(current_x, gradient) re_head = head_proj(edges=edges, weights=edge_weights, x=normalized_gradient, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., err_tol=1e-8, max_iter=100, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head gamma_x = set(re_nodes) print('gamma_x', len(gamma_x)) # print(sorted(list(gamma_x))) if i == 0: supp_x = set() else: supp_x = set( [ind for ind, _ in enumerate(current_x) if not 0. == _]) omega_x = gamma_x | supp_x print('omega_x', len(omega_x)) # print(sorted(list(omega_x))) # print(gradient[sorted(list(gamma_x))]) # print(gradient[sorted(list(supp_x))]) bx = proj_argmax(current_x, omega_x, weight, A, lmbd, max_iter=2000, learning_rate=0.01) re_tail = tail_proj(edges=edges, weights=edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) re_nodes, _, _ = re_tail psi_x = set(re_nodes) prev_x = current_x current_x = np.zeros_like(current_x) current_x[list(psi_x)] = bx[list(psi_x)] print('psi_x', len(np.nonzero(current_x)[0])) diff_norm_x = np.linalg.norm(current_x - prev_x) func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd) print( 'iter {}, func val: {:.5f}, iter_time: {:.5f}, diff_norm: {:.5f}'. format(i, func_val, time.time() - iter_time, diff_norm_x)) subgraph = set(np.nonzero(current_x)[0]) print('subgraph density', nx.density(nx.subgraph(graph, subgraph))) if diff_norm_x <= epsilon: break run_time = time.time() - start_time func_val, xtw, dense_term = proj_get_func_val(current_x, weight, A, lmbd) print('final function value: {:.5f}'.format(func_val)) print('run time of whole algorithm: {:.5f}'.format(run_time)) subgraph = set(np.nonzero(current_x)[0]) return subgraph
cliques = nx.find_cliques(G) # Count and print the number of maximal cliques in G print(len(list(cliques))) ############################## Task 2 (Finding cliques (II)) # Import necessary modules import networkx as nx from nxviz import CircosPlot import matplotlib.pyplot as plt # Find the author(s) that are part of the largest maximal clique: largest_clique largest_clique = sorted(nx.find_cliques(G), key=lambda x: len(x))[-1] # Create the subgraph of the largest_clique: G_lc G_lc = nx.subgraph(G, largest_clique) # Create the CircosPlot object: c c = CircosPlot(G_lc) # Draw the CircosPlot to the screen c.draw() plt.show() ############################## Final Task ########################### # Find important users : nx.degree_centrality() # Find largest communities of collaborators : max_cliques() # recommmendation system : open triangle ############################## Task 1 (Finding important collaborators) # Compute the degree centralities of G: deg_cent
def inducer(graph, node): nebs = nx.neighbors(graph, node) sub_nodes = nebs + [node] sub_g = nx.subgraph(graph, sub_nodes) out_counts = np.sum(map(lambda x: len(nx.neighbors(graph,x)), sub_nodes)) return sub_g, out_counts, nebs
def optimize(instance, sparsity, threshold, trade_off, learning_rate, max_iter, epsilon=1e-3, logger=None): first_graph = instance['first_graph'] second_graph = instance['second_graph'] true_subgraph = instance['true_subgraph'] features = instance['weight'] A = adj_matrix( second_graph ) # get adjacent matrix of second graph, used for density projection first_graph_edges = np.array(first_graph.edges) first_graph_edge_weights = np.ones( first_graph.number_of_edges()) # edge weight, default 1 print('number of nodes in first graph', first_graph.number_of_nodes()) print('number of nodes in second graph', second_graph.number_of_nodes()) if first_graph.number_of_nodes() != second_graph.number_of_nodes(): raise ('error, wrong dual network input !!!') num_nodes = first_graph.number_of_nodes() num_edges_first_graph = first_graph.number_of_edges() num_edges_second_graph = second_graph.number_of_edges() if logger: # print some basic information logger.debug('-' * 5 + ' related info ' + '-' * 5) logger.debug('algorithm: graph block-structured GHTP') logger.debug('sparsity: {:d}'.format(sparsity)) logger.debug('max iteration: {:d}'.format(max_iter)) logger.debug('number of nodes: {:d}'.format(num_nodes)) logger.debug('number of edges in first graph: {:d}'.format( num_edges_first_graph)) logger.debug('number of edges in second graph: {:d}'.format( num_edges_second_graph)) logger.debug('density of first graph: {:.5f}'.format( nx.density(first_graph))) logger.debug('density of second graph: {:.5f}'.format( nx.density(second_graph))) logger.debug('density of true subgraph in second graph: {:.5f}'.format( nx.density(nx.subgraph(second_graph, true_subgraph)))) logger.debug('-' * 5 + ' start iterating ' + '-' * 5) start_time = time.time() acc_proj_time = 0. func = DualEMS(features, trade_off) if logger: print(sorted(true_subgraph)) true_x = np.zeros(num_nodes) # print(type(true_subgraph)) true_x[list(true_subgraph)] = 1. true_x = np.array(true_x) true_obj_val, x_ems_val, y_ems_val, penalty = func.get_obj_val( true_x, true_x) print('ground truth values: {}, {}, {}, {}'.format( true_obj_val, x_ems_val, y_ems_val, penalty)) current_x, current_y = func.get_init_x_zeros( ) # are there some other better initialization methods? current_x += 1e-6 # start from zero, plus 1e-6 avoid divide by zero error current_y += 1e-6 print('iteration start funval', func.get_obj_val(current_x, current_y)) for iter in range(max_iter): # external iteration if logger: logger.debug('iteration: {:d}'.format(iter)) prev_x, prev_y = np.copy(current_x), np.copy( current_y) # store previous vectors for early termination # handle first graph grad_x = func.get_gradient(current_x, current_y) iter_proj_time = 0. if iter == 0: # from all zero vector norm_grad_x = normalize_gradient(np.zeros_like(current_x), grad_x) else: norm_grad_x = normalize_gradient(current_x, grad_x) start_proj_time = time.time() # head projection for the connected constraint, so projection should be on first graph re_head = head_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=norm_grad_x, g=1, s=sparsity, budget=sparsity - 1., delta=1. / 169., max_iter=100, err_tol=1e-8, root=-1, pruning='strong', epsilon=1e-10, verbose=0) re_nodes, _, _ = re_head iter_proj_time += (time.time() - start_proj_time) print('head projection time for x: {:.5f}'.format(time.time() - start_proj_time)) gamma_x = set(re_nodes) indicator_x = np.zeros(num_nodes) indicator_x[list(gamma_x)] = 1. # there is no differene between using grad_x and norm_grad_x, because indicator_x is got from norm_grad_x if iter == 0: tmp_x = np.zeros_like( current_x ) + learning_rate * norm_grad_x * indicator_x # start from all zeros else: tmp_x = current_x + learning_rate * norm_grad_x * indicator_x omega_x = set([ind for ind, _ in enumerate(tmp_x) if not 0. == _]) # head projection for y grad_y = func.get_gradient(current_y, current_x) # note, reverse order x & y # note, test not normalize if iter == 0: norm_grad_y = normalize_gradient( np.zeros_like(current_y), grad_y) # # note, is it necessary for density projection? else: norm_grad_y = normalize_gradient(current_y, grad_y) # norm_grad_y = grad_y # note !!! # note, should be positive for gradient, input for density projection should be positive # note, why baojian's code does not consider positive value, head projection abs_norm_grad_y = np.absolute( norm_grad_y ) # take absolute value of gradient, since larger absolute value represent larger affection to objective function np.set_printoptions(linewidth=3000) # print(norm_grad_y) # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1, 0.12, 0.15, 0.17, 0.2, 0.2, 0.2, 0.2, 0.21, 0.22, 0.23, 0.18, 0.18, 0.18, 0.17] # normalize lmbd_list = [0.23] # normalize # lmbd_list = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005, 0.0006] # normalize # sparsity_list = [250, 250, 260, 270, 270, 275, 275, 280, 280, 280, 265, 270, 275, 275, 280, 285, 260, 255, 250, 245] # normalize sparsity_list = [275] # normalize # sparsity_list = [50, 50, 50, 50, 55] # normalize lmbd_sparsity_list = zip(lmbd_list, sparsity_list) # sparsity_list = [50] print('start head projection for y') start_proj_time = time.time() # gamma_y = density_projection(second_graph, norm_grad_y, A, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False) gamma_y = density_projection( second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=True, true_subgraph=true_subgraph ) # test not normalize, need new lambda sparsity list # gamma_y = density_projection(second_graph, abs_norm_grad_y, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # test not normalize, need new lambda sparsity list iter_proj_time += (time.time() - start_proj_time) print('head projection time for y: {:.5f}'.format(time.time() - start_proj_time)) # # # indicator_y = np.zeros(num_nodes) # indicator_y[list(gamma_y)] = 1. # if iter == 0: # # tmp_y = np.zeros_like(current_y) + learning_rate * grad_y * indicator_y # tmp_y = np.zeros_like(current_y) + learning_rate * norm_grad_y * indicator_y # todo, pls note that update gradient should be normalized gradient # else: # # tmp_y = current_y + learning_rate * grad_y * indicator_y # tmp_y = current_y + learning_rate * norm_grad_y * indicator_y # # omega_y = set([ind for ind, _ in enumerate(tmp_y) if not 0. == _]) # # print('omega_x', len(omega_x)) # print(sorted(list(omega_x))) # # print('omega_y', len(omega_y)) # print(sorted(list(omega_y))) # # print('intersect', len(omega_y & omega_x)) # print(sorted(list(omega_y & omega_x))) # # # break # # print('solve argmax') # start_max_time = time.time() # bx, by = func.argmax_obj_with_proj(current_x, current_y, omega_x, omega_y) # print('solve argmax time {:.5f}'.format(time.time() - start_max_time)) # # # break # # start_proj_time = time.time() # # tail projection on first graph # re_tail = tail_proj(edges=first_graph_edges, weights=first_graph_edge_weights, x=bx, g=1, s=sparsity, budget=sparsity - 1., nu=2.5, max_iter=100, err_tol=1e-8, root=-1, pruning='strong', verbose=0) # tail projection # re_nodes, _, _ = re_tail # iter_proj_time += time.time() - start_proj_time # print('tail projection time for x: {:.5f}'.format(time.time() - start_proj_time)) # psi_x = set(re_nodes) # # current_x = np.zeros_like(current_x) # current_x[list(psi_x)] = bx[list(psi_x)] # current_x = normalize(current_x) # # lmbd_list = [0.01, 0.05, 0.07, 0.08, 0.09, 0.1] # # lmbd_list = [0.006, 0.08] # sparsity_list = [250, 250, 270, 270, 275, 275] # lmbd_sparsity_list = zip(lmbd_list, sparsity_list) # # start_proj_time = time.time() # # psi_y = density_projection(second_graph, by, threshold, min_sparsity, max_sparsity, step_sparsity, normalize=False) # psi_y = density_projection(second_graph, by, A, threshold, lmbd_sparsity_list, normalize=False, true_subgraph=true_subgraph) # not normalize, not absolute value, since by is in [0, 1] # iter_proj_time += (time.time() - start_proj_time) # # print('tail projetion time for y: {:.5f}'.format(time.time() - start_proj_time)) # # current_y = np.zeros_like(current_y) # print('1', len(np.nonzero(by)[0])) # print('by nonzero', sorted(list(np.nonzero(by)[0]))) # print('1v', len(np.nonzero(bx)[0])) # print('2', len(psi_y)) # print('psi_y', sorted(list(psi_y))) # print('2v', len(psi_x)) # current_y[list(psi_y)] = by[list(psi_y)] # print('3', len(np.nonzero(current_y)[0])) # print('3v', len(np.nonzero(current_x)[0])) # current_y = normalize(current_y) # print('4', len(np.nonzero(current_y)[0])) # print('4v', len(np.nonzero(current_x)[0])) # # print('{} iteration funval'.format(iter), func.get_obj_val(current_x, current_y)) # # acc_proj_time += iter_proj_time # # if logger: # print('iter proj time: {:.5f}'.format(iter_proj_time)) # # diff_norm = np.sqrt(np.linalg.norm(current_x - prev_x) ** 2 + np.linalg.norm(current_y - prev_y) ** 2) # if logger: # logger.debug('difference norm: {}'.format(diff_norm)) # # if diff_norm < epsilon: # break # run_time = time.time() - start_time if logger: pass return current_x, current_y, run_time
def connected_components(graph, nbunch): g = nx.subgraph(graph, nbunch) ccomponents = nx.connected_components(g) return ccomponents
def draw_graph(self, ax=None, node_size=8): """ Draws the state graph of the maze in a fancy way. """ if ax is None: fig, ax = plt.subplots(1) graph = self.compute_graph() d = nx.shortest_path(graph, graph.start) p_goal = d[graph.goal] d_goal = len(p_goal) subgraph = nx.subgraph(graph, d.keys()) n_levels = max(map(len, d.values())) nodespos = dict() max_nodes_in_level = 0 nodes = [graph.start] nodespos[graph.start] = [-0.5, 0] for i in range(1, n_levels + 2): nodes = [[ n for n in nx.DiGraph.successors(graph, m) if len(d[n]) == i + 1 ] for m in nodes] nodes = sum(nodes, []) seen = set() seen_add = seen.add nodes = [n for n in nodes if n not in seen and not seen_add(n)] #nodes = list(set(sum(nodes, []))) # place the nodes of the solution on the left l = [n for n in nodes if n in p_goal] if l != []: nodes.remove(l[0]) nodes = [l[0]] + nodes if len(nodes) > max_nodes_in_level: max_nodes_in_level = len(nodes) for j, n in enumerate(nodes): nodespos[n] = [j - 1.0 * len(nodes) / 2, -i] if ax is None: fig, ax = subplots(1) main_edges = [(n1, n2) for (n1, n2) in subgraph.edges() if (len(d[n2]) - len(d[n1]) == 1)] other_edges = [e for e in subgraph.edges() if e not in main_edges] nx.draw_networkx_edges(nx.Graph(subgraph), edgelist=other_edges, ax=ax, pos=nodespos, width=0.5, alpha=.4) nx.draw_networkx_edges(nx.Graph(subgraph), edgelist=main_edges, ax=ax, pos=nodespos, width=2) nx.draw_networkx_nodes(subgraph, pos=nodespos, ax=ax, node_size=node_size) for n, col in ([graph.start, 'g'], [graph.goal, 'b']): nx.draw_networkx_nodes(graph, nodespos, [n], ax=ax, node_size=1.5 * node_size, node_color=col) ax.set_ylim(-n_levels, 1) ax.set_xlim(-1.0 * max_nodes_in_level / 2 - 1, 1.0 * max_nodes_in_level / 2) ax.set_axis_off()
def compute_score(self): """ Computes a score for the maze. This is an example of scoring function which implements a few of my favorite criteria for a good maze: - The solution is unique and long. - There are plenty of loops and bagends. - Many openings, many false endings which make the maze difficult to solve backwards. This is all very subjective and incomplete. To grow mazes according to other criteria, overwrite this function in a subclass of Vmaze. """ score = 1.0 graph = self.compute_graph() if len(graph.nodes()) == 0: return 0 shortest = nx.shortest_path(graph, graph.start) # Check that there is only one solution. if graph.goal not in shortest.keys(): return 0 # Check that there is one solution exactly gen = nx.all_simple_paths(graph, graph.start, graph.goal) next(gen) try: next(gen) except StopIteration: pass else: return 1.0 distances = {n: len(p) for n, p in shortest.items()} path_goal = shortest[graph.goal] d_goal = distances[graph.goal] subgraph = nx.subgraph(graph, shortest.keys()) # Reward the length of the minimal solution score *= d_goal**2 # Reward the openings, and the states that are nearer # than the solution in general openings_1 = len([d for n, d in distances.items() if d <= 2]) openings_2 = len([d for n, d in distances.items() if d <= d_goal]) score *= (openings_1 * openings_2**2) # Reward the "loops" n_loops = len([ e for e in subgraph.edges() if abs(distances[e[0]] - distances[e[1]]) > 1 ]) score *= (n_loops**0.5) # Reward the false endings paths_to_end = nx.shortest_path(graph, target=graph.goal) n_endings = len( [path for path in paths_to_end.values() if len(path) < 3]) score *= n_endings return score
def simulated_annealing(graph, num_buses, size_bus, constraints, test_mode=False): all_names = graph.nodes() x = all_names[0] print(graph[x]) print(graph[x]['weight']) CD = constraint_dictionary(all_names, constraints) #assignment = random_assignment(all_names, num_buses, size_bus) assignment = greedy(graph, num_buses, size_bus, constraints) groups = assignment_to_groups(assignment) assert is_valid(graph, num_buses, size_bus, assignment) T0 = 1.0 iterations = 5000 student_status = calculate_student_status(graph, constraints, assignment) starting_score = evaluate_assignment(graph, constraints, assignment) current_score = starting_score for i in range(iterations): T = T0 * (1 - i / iterations) swap = propose_swap(assignment, num_buses, size_bus, groups=groups) if swap is None: continue val, student_status_swap = swap_value(graph, constraints, CD, student_status, assignment, swap) if val >= 0 or random.random() < np.exp(val / T): group_A, A, group_B, B = swap if A is not None: assignment[A] = group_B groups[group_B].append(A) groups[group_A].remove(A) assignment[B] = group_A groups[group_A].append(B) groups[group_B].remove(B) student_status = student_status_swap if (test_mode): new_score = evaluate_assignment(graph, constraints, assignment) student_status_check = calculate_student_status( graph, constraints, assignment) for s in student_status_check: assert student_status[s] == student_status_check[s] if new_score != current_score + val: print(new_score, current_score, val) print(swap) print(student_status) print(assignment_to_groups(assignment)) print(constraints) nodes = [] for x in assignment: if assignment[x] == group_A or assignment[x] == group_B: nodes.append(x) import matplotlib.pyplot as plt subgraph = nx.subgraph(graph, nodes) nx.draw(subgraph, with_labels=True, font_weight='bold') plt.show() assert False current_score = new_score assert is_valid(graph, num_buses, size_bus, assignment) #print('Score:', evaluate_assignment(graph, constraints, assignment)) return assignment
def _build_list(self): print "using disconnectivity analysis to find minima to untrap" self.minpairs = deque() graph = TSGraph(self.database).graph cclist = list(nx.connected_components(graph)) # get the largest cluster group1 = cclist[0] min1 = sorted(group1, key=lambda m: m.energy)[0] if not min1 == self.database.minima()[0]: # make sure that the global minimum is in group1 print "warning, the global minimum is not the in the largest cluster." # compute the energy barriers for all minima in the cluster subgraph = nx.subgraph(graph, group1) energy_barriers = self._compute_barriers(subgraph, min1) # sort the minima by the barrier height divided by the energy difference weights = [(m, np.abs(barrier) / np.abs(m.energy - min1.energy)) for (m, barrier) in energy_barriers.iteritems()] weights.sort(key=lambda v: 1. / v[1]) self.minpairs = deque() for min2, w in weights: if len(self.minpairs) > self.list_len: break if not self.is_good_pair(min1, min2): continue self.minpairs.append((min1, min2)) if True: # print some stuff print " untrap analysis: minimum", min2._id, "with energy", min2.energy, "barrier", energy_barriers[min2], "untrap weight", w