def read_graph(filename,gt_file): with open(gt_file) as f: read_dict=json.load(f) g = nx.DiGraph() with open (filename) as f: for lines in f: lines1=lines.split() g.add_node(lines1[0] + "_" + lines1[2]) g.add_node(lines1[1] + "_" + lines1[3]) if lines1[0] in read_dict: g.node[lines1[0] + "_" + lines1[2]]['aln_start']=min(read_dict[lines1[0]][0][0],read_dict[lines1[0]][0][1]) g.node[lines1[0] + "_" + lines1[2]]['aln_end']=max(read_dict[lines1[0]][0][0],read_dict[lines1[0]][0][1]) else: g.node[lines1[0] + "_" + lines1[2]]['aln_start']=0 g.node[lines1[0] + "_" + lines1[2]]['aln_end']=0 if lines1[1] in read_dict: g.node[lines1[1] + "_" + lines1[3]]['aln_start']=min(read_dict[lines1[1]][0][0],read_dict[lines1[1]][0][1]) g.node[lines1[1] + "_" + lines1[3]]['aln_end']=max(read_dict[lines1[1]][0][0],read_dict[lines1[1]][0][1]) else: g.node[lines1[1] + "_" + lines1[3]]['aln_start']=0 g.node[lines1[1] + "_" + lines1[3]]['aln_end']=0 g.node[lines1[0] + "_" + lines1[2]]['active']=1 g.node[lines1[1] + "_" + lines1[3]]['active']=int(lines1[4]) g.add_edge(lines1[0] + "_" + lines1[2], lines1[1] + "_" + lines1[3]) nx.write_graphml(g, filename.split('.')[0]+'_hgraph.graphml') print nx.number_weakly_connected_components(g) print nx.number_strongly_connected_components(g)
def main(self): # Load the data data = dataLoader.DataLoader() medium = data.load_medium() large = data.load_large() # Send it to the opener med = self.opener(medium) lg = self.opener(large) # Print the results print("Q2.4 How many weakly connected components and how many strongly connected components does this network have? How many nodes and links are in the largest strongly connected component of this graph?\n") print("Number of weakly connected components Medium: " + str(nx.number_weakly_connected_components(med))) print("Number of weakly connected components Large: " + str(nx.number_weakly_connected_components(lg))) print("\n") print("Number of strongly connected components Medium: " + str(nx.number_strongly_connected_components(med))) print("Number of strongly connected components Large: " + str(nx.number_strongly_connected_components(lg))) print("\n") print("How many nodes are in the largest strongly connected component?") print("Medium Network: " + str(nx.number_of_nodes(max(nx.strongly_connected_component_subgraphs(med), key=len)))) print("Large Network: " + str(nx.number_of_nodes(max(nx.strongly_connected_component_subgraphs(lg), key=len)))) print("\n") print("How many edges are in the largest strongly connected component?") print("Medium Network: " + str(nx.number_of_edges(max(nx.strongly_connected_component_subgraphs(med), key=len)))) print("Large Network: " + str(nx.number_of_edges(max(nx.strongly_connected_component_subgraphs(lg), key=len))))
def save_network_statistics(g): stats = {} stats[ 'num_weakly_connected_components'] = nx.number_weakly_connected_components( g) stats[ 'num_strongly_connected_components'] = nx.number_strongly_connected_components( g) stats['num_nodes'] = nx.number_of_nodes(g) stats['num_edges'] = nx.number_of_edges(g) stats['density'] = nx.density(g) try: stats['avg_clustering_coef'] = nx.average_clustering(g) except: stats['avg_clustering_coef'] = None # not defined for directed graphs stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes']) stats['transitivity'] = nx.transitivity(g) try: stats['diameter'] = nx.diameter(g) except: stats[ 'diameter'] = None # unconnected --> infinite path length between connected components with open('./network-statistics/twitter-combined-statistics.txt', 'wb') as f: for stat_name, stat_value in stats.iteritems(): f.write(stat_name + ': ' + str(stat_value) + '\n')
def main(): # Create authenticated API consumer_key="" consumer_secret="" access_token="" token_secret="" api=twitter.Api(consumer_key=consumer_key,consumer_secret=consumer_secret,access_token_key=access_token,access_token_secret=token_secret) hashtag="#cusec" # Get all of the CUSEC data cusec=list() for i in range(1,8): cusec.extend(api.GetSearch(hashtag,per_page=100,page=i)) print("Number of searches returned: "+str(len(cusec))) # List of all CUSEC tweeters cusec_tweeters=unique(map(lambda s: s.user.screen_name, cusec)) print("Number of people Tweeting "+hashtag+": "+str(len(cusec_tweeters))) # Create network cusec_network,cusec_mc,cusec_subgraph=twitter_network(cusec_tweeters, api, user_type="cusec") # Export networks nx.write_graphml(cusec_network, "data/cusec_network.graphml") nx.write_graphml(cusec_mc,"data/cusec_main_comp.graphml") nx.write_graphml(cusec_subgraph,"data/cusec_users_subgraph.graphml") print(nx.info(cusec_network)) print("Number of weakly connected components: "+str(nx.number_weakly_connected_components(cusec_network)))
def save_network_statistics(g): degrees = g.degree nn = [i[1] for i in degrees] stats = {} stats[ 'num_weakly_connected_components'] = nx.number_weakly_connected_components( g) stats[ 'num_strongly_connected_components'] = nx.number_strongly_connected_components( g) stats['num_nodes'] = nx.number_of_nodes(g) stats['num_edges'] = nx.number_of_edges(g) stats['density'] = nx.density(g) try: stats['avg_clustering_coef'] = nx.average_clustering(g) except: stats['avg_clustering_coef'] = None # not defined for directed graphs stats['avg_degree'] = sum(nn) / float(stats['num_nodes']) stats['transitivity'] = nx.transitivity(g) try: stats['diameter'] = nx.diameter(g) except: stats[ 'diameter'] = None # unconnected --> infinite path length between connected components with open('D:\\fingerprint-statistics-undirected.txt', 'w') as f: for stat_name, stat_value in stats.items(): temp = str(stat_value) f.write(stat_name + ': ' + temp + '\n')
def analyze_connected_components(graph, data, filename, clusters=None): print('Nodes:', graph.number_of_nodes()) print("Components:", nx.number_weakly_connected_components(graph), '\n') print(max([len(nodes) for nodes in nx.weakly_connected_components(graph)])) for i, nodes in enumerate(nx.weakly_connected_components(graph)): if clusters is not None and i not in clusters: continue # skip is cluster is small or is complete (for now) if len(nodes) < 3 or not nx.complement(nx.Graph( graph.subgraph(nodes))).number_of_edges(): continue print('meta-cluster id:', i, 'quality:', cluster_quality(graph.subgraph(nodes), data)) print(r'\begin{center}') print(r'\includegraphics[width=4in]{./plots/analyze_streaming_alg/' + filename + '/' + str(i) + '.png}') print(r'\end{center}') for node in nodes: comp = graph.nodes[node]['contains'] ad_texts = data.data.loc[data.data['ad_id'].isin( comp)]['u_Description'] text = remove_punctuation(ad_texts.iloc[0]) print(node, text, '\n') print(r'\newpage ')
def get_graph_statistics(DG,category,kcore): print('Graph statistics for category:',category) # number of connected components num_connected_components = nx.number_weakly_connected_components(DG) print('Number of weakly connected components:',num_connected_components) print() category_name_list = category.split(' ') # in-degree distribution print('In-degree distribution.') filename = "-".join(category_name_list)+'-in-degree-hist-'+str(kcore)+'.eps' indegree_list = DG.in_degree().values() if indegree_list: print('Max value of in-degree',max(indegree_list)) print() # print('Indegree list:',indegree_list) histogram(indegree_list,'plots/category_in_degree/'+filename,'in-degree','# nodes','Distribution of in-degree per category',100,None,1,500,True) # edge weight distribution print('Edge weight distribution.') filename = "-".join(category_name_list)+'-edge-weight-hist-'+str(kcore)+'.eps' weight_list = [e[2] for e in DG.edges_iter(data='weight', default=1)] if weight_list: print('Max value of weight edges',max(weight_list)) print() # print('Weight list:',weight_list) histogram(weight_list,'plots/category_edge_weight/'+filename,'edge weight','# edges','Distribution of edge weights per category',60,None,1,500,True)
def answer_four(): # Your Code Here G = answer_one() no_weak_components = nx.number_weakly_connected_components(G) no_weak_nodes = len(list(nx.weakly_connected_components(G))[0]) return no_weak_nodes
def component(self): rslt = {} if self.directed == 'directed': rslt['is_strongly_connected'] = nx.is_strongly_connected( self.graph) strong = nx.strongly_connected_components(self.graph) strong_nodes = [] for n in strong: strong_nodes.append(list(n)[0]) rslt['strongly_connected'] = strong_nodes rslt[ 'number_strongly_connected_components'] = nx.number_strongly_connected_components( self.graph) rslt['is_semiconnected'] = nx.is_semiconnected(self.graph) weak = nx.weakly_connected_components(self.graph) weak_nodes = [] for n in weak: weak_nodes.append(list(n)[0]) rslt['wealy_connected'] = weak_nodes rslt['is_weakly_connected'] = nx.is_weakly_connected(self.graph) rslt[ 'number_weakly_connected_components'] = nx.number_weakly_connected_components( self.graph) fname_component = self.DIR + '/component.json' with open(fname_component, "w") as f: json.dump(rslt, f, cls=SetEncoder, indent=2) print(fname_component)
def component_stats(G, verbose): """Prints out various relevent stats about graphs concerning components. Parameters ---------- G : networkx.DiGraph verbose : bool Set to True if you want explanations of stats Returns ------- Note: Writes to terminal. """ explans = {} if verbose == True: explans['weakly-connected'] = "(There is an undirected path between each pair of nodes in the directed graph)" explans['strongly-connected'] = "(There is a directed path between each pair of nodes in the directed graph)" explans['semiconnected'] = "" else: explans['weakly-connected'] = "" explans['strongly-connected'] = "" explans['semiconnected'] = "" print "Is the graph weakly connected "+explans['weakly-connected'] +"? "+ str(nx.is_weakly_connected(G)) print "Number of weakly connected components: " + str(nx.number_weakly_connected_components(G)) print "Is the graph semiconnected "+explans['semiconnected']+ "? " + str(nx.is_semiconnected(G)) print "Is the graph strongly connected "+explans['strongly-connected']+ "? "+ str(nx.is_strongly_connected(G))
def graph_structure_info(G, filename, df): #degree_out = pd.DataFrame([d for n,d in G.out_degree()]) # 这里定义了out_degree degree_in = pd.DataFrame([d for n, d in G.in_degree()]) # 这里定义了out_degree df1 = pd.DataFrame( data={ 'date': [filename], 'nodes_num': [G.number_of_nodes()], 'edges_num': [G.number_of_edges()], 'density': [nx.density(G)], 'number_weakly_connected_components': [nx.number_weakly_connected_components(G)], 'number_strongly_connected_components': [nx.number_strongly_connected_components(G)], #'size_largest_strongly_connected_components':[len(max(nx.strongly_connected_components(G), key=len))], #'size_largest_weakly_connected_components':[len(max(nx.weakly_connected_components(G), key=len))], 'weights_sum': [sum(list((nx.get_edge_attributes(G, 'weight')).values()))], #'ave_degree_out':[degree_out.mean()], #入度均值=出度均值 'ave_degree': [degree_in.mean()] }) #'ave_clustering_coeffient':[nx.average_clustering(G)], #'ave_shortest_path_length':[nx.average_shortest_path_length(G)] #print df1 df = df.append(df1) print "this year graph_basic_info is done" return df
def plot_num_components_directed(G_times, fname): max_time = len(G_times) t = list(range(0, max_time)) num_strong = [] num_weak = [] for G in G_times: num_strong.append(nx.number_strongly_connected_components(G)) num_weak.append(nx.number_weakly_connected_components(G)) plt.rcParams.update({'figure.autolayout': True}) plt.rc('xtick', labelsize='x-small') plt.rc('ytick', labelsize='x-small') fig = plt.figure(figsize=(4, 2)) ax = fig.add_subplot(1, 1, 1) ax.plot(t, num_strong, marker="P", color='#ffa600', ls='solid', linewidth=0.5, markersize=1, label="strongly") ax.plot(t, num_weak, marker="h", color='#003f5c', ls='solid', linewidth=0.5, markersize=1, label="weakly") ax.set_xlabel('time', fontsize=8) outliers = find_rarity_windowed_outlier(num_weak) #use weakly outliers.sort() for xc in outliers: plt.axvline(x=xc,color='k', linestyle=":", linewidth=0.5) # ax.set_xscale('log') # ax.set_yscale('log') ax.set_ylabel('number of connected components', fontsize=8) plt.title("number of connected components over time", fontsize='x-small') plt.legend(fontsize=5) plt.savefig(fname+'components.pdf', pad_inches=0) return outliers
def info_list(graph): """Returns useful information about the graph as a list of tuples :param pybel.BELGraph graph: A BEL graph :rtype: list """ number_nodes = graph.number_of_nodes() result = [ ('Nodes', number_nodes), ('Edges', graph.number_of_edges()), ('Citations', count_unique_citations(graph)), ('Authors', count_unique_authors(graph)), ('Network density', nx.density(graph)), ('Components', nx.number_weakly_connected_components(graph)), ] try: result.append(('Average degree', sum(graph.in_degree().values()) / float(number_nodes))) except ZeroDivisionError: log.info('Graph has no nodes') if graph.warnings: result.append(('Compilation warnings', len(graph.warnings))) return result
def iterative_graph(monostrings, min_k, max_k, outdir, min_mult=5, step=1, starting_graph=None, verbose=True): smart_makedirs(outdir) dbs, all_contigs = {}, {} all_frequent_kmers, all_frequent_kmers_read_pos = {}, {} strings = {k: ''.join(v.string) for k, v in monostrings.items()} input_strings = strings.copy() complex_kp1mers = {} if starting_graph is not None: contigs, contig_paths = starting_graph.get_contigs() for i in range(len(contigs)): for j in range(min_mult): input_strings[f'contig_k{min_k}_i{i}_j{j}'] = contigs[i] complex_kp1mers = get_paths_thru_complex_nodes(starting_graph, strings) for k in range(min_k, max_k + 1, step): frequent_kmers, frequent_kmers_read_pos = \ get_frequent_kmers(input_strings, k=k, min_mult=min_mult) frequent_kmers.update(complex_kp1mers) if verbose: print(f'\nk={k}') print(f'#frequent kmers = {len(frequent_kmers)}') all_frequent_kmers[k] = frequent_kmers all_frequent_kmers_read_pos[k] = frequent_kmers_read_pos db = DeBruijnGraph(k=k) db.add_kmers(frequent_kmers, coverage=frequent_kmers) db.collapse_nonbranching_paths() if verbose and nx.number_weakly_connected_components(db.graph) > 1: print(f'#cc = {nx.number_weakly_connected_components(db.graph)}') for cc in nx.weakly_connected_components(db.graph): print(len(cc)) # break dbs[k] = db dot_file = os.path.join(outdir, f'db_k{k}.dot') # pdf_file = os.path.join(outdir, f'db_k{k}.pdf') nx.drawing.nx_pydot.write_dot(db.graph, dot_file) # os.system(f"dot -Tpdf {dot_file} -o {pdf_file}") contigs, contig_paths = db.get_contigs() all_contigs[k] = contigs input_strings = strings.copy() for i in range(len(contigs)): for j in range(min_mult): input_strings[f'contig_k{k}_i{i}_j{j}'] = contigs[i] complex_kp1mers = get_paths_thru_complex_nodes(db, strings) return all_contigs, dbs, all_frequent_kmers, all_frequent_kmers_read_pos
def DNetworkSummary(D, qtr, filename=None): """Just outputs and prints a quick table of DIRECTED network statistics.""" ## Calculate all the network summary stats n = D.number_of_nodes() e = D.size() e_c = D.size(weight='calls') e_min = D.size(weight='min') e_sms = D.size(weight='sms') e_mms = D.size(weight='mms') n_scc = nx.number_strongly_connected_components(D) r_scc = relativeLSCCsize(D) n_wcc = nx.number_weakly_connected_components(D) r_wcc = relativeLWCCsize(D) ## Description vector for printout and output file ts = " " # just so the output file is a little more readable a1 = "Directed Network Statistics -- Quarter " a2 = ts + "Number of nodes: " a3 = ts + "Number of edges (unweighted): " a4 = ts + ts + "Number of edges (weighted by calls): " a5 = ts + ts + "Number of edges (weighted by minutes): " a6 = ts + ts + "Number of edges (weighted by SMS): " a7 = ts + ts + "Number of edges (weighted by MMS): " a8 = ts + "Number of Strongly Connected Components (SCC): " a9 = ts + ts + "Relative size of largest SCC: " a10 = ts + "Number of Weakly Connected Components (WCC): " a11 = ts + ts + "Relative size of largest WCC: " name = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11] result = [qtr, n, e, e_c, e_min, e_sms, e_mms, n_scc, r_scc, n_wcc, r_wcc] outputresults = pd.DataFrame(data={'Description': name, 'Result': result}) if (filename is not None): outputresults.to_csv(filename, index=False) print outputresults
def draw_graph(nodes, edges, graphs_dir, default_lang='all'): lang_graph = nx.MultiDiGraph() lang_graph.add_nodes_from(nodes) for edge in edges: if edges[edge] == 0: lang_graph.add_edge(edge[0], edge[1]) else: lang_graph.add_edge(edge[0], edge[1], weight=float(edges[edge]), label=str(edges[edge])) # print graph info in stdout # degree centrality print('-----------------\n\n') print(default_lang) print(nx.info(lang_graph)) try: # When ties are associated to some positive aspects such as friendship or collaboration, # indegree is often interpreted as a form of popularity, and outdegree as gregariousness. DC = nx.degree_centrality(lang_graph) max_dc = max(DC.values()) max_dc_list = [item for item in DC.items() if item[1] == max_dc] except ZeroDivisionError: max_dc_list = [] # https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%81%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8 print('maxdc', str(max_dc_list), sep=': ') # assortativity coef AC = nx.degree_assortativity_coefficient(lang_graph) print('AC', str(AC), sep=': ') # connectivity print("Слабо-связный граф: ", nx.is_weakly_connected(lang_graph)) print("количество слабосвязанных компонент: ", nx.number_weakly_connected_components(lang_graph)) print("Сильно-связный граф: ", nx.is_strongly_connected(lang_graph)) print("количество сильносвязанных компонент: ", nx.number_strongly_connected_components(lang_graph)) print("рекурсивные? компоненты: ", nx.number_attracting_components(lang_graph)) print("число вершинной связности: ", nx.node_connectivity(lang_graph)) print("число рёберной связности: ", nx.edge_connectivity(lang_graph)) # other info print("average degree connectivity: ", nx.average_degree_connectivity(lang_graph)) print("average neighbor degree: ", sorted(nx.average_neighbor_degree(lang_graph).items(), key=itemgetter(1), reverse=True)) # best for small graphs, and our graphs are pretty small print("pagerank: ", sorted(nx.pagerank_numpy(lang_graph).items(), key=itemgetter(1), reverse=True)) plt.figure(figsize=(16.0, 9.0), dpi=80) plt.axis('off') pos = graphviz_layout(lang_graph) nx.draw_networkx_edges(lang_graph, pos, alpha=0.5, arrows=True) nx.draw_networkx(lang_graph, pos, node_size=1000, font_size=12, with_labels=True, node_color='green') nx.draw_networkx_edge_labels(lang_graph, pos, edges) # saving file to draw it with dot-graphviz # changing overall graph view, default is top-bottom lang_graph.graph['graph'] = {'rankdir': 'LR'} # marking with blue nodes with maximum degree centrality for max_dc_node in max_dc_list: lang_graph.node[max_dc_node[0]]['fontcolor'] = 'blue' write_dot(lang_graph, os.path.join(graphs_dir, default_lang + '_links.dot')) # plt.show() plt.savefig(os.path.join(graphs_dir, 'python_' + default_lang + '_graph.png'), dpi=100) plt.close()
def _sanity_check(G): r""" Helper function that checks if the input graphs contains a single connected component. Raises an error if not. Parameters ---------- G : graph A NetworkX graph Raises ------ ValueError If the graph has more than one (weakly) connected component. """ # Compute the number of connected components if G.is_directed(): num_ccs = nx.number_weakly_connected_components(G) else: num_ccs = nx.number_connected_components(G) # Rise an error if more than one CC exists if num_ccs != 1: raise ValueError( "Input graph should contain one (weakly) connected component. " "This graph contains: " + str(num_ccs))
def fill_out_report(*, network: Network, report: Report, graph: Optional[BELGraph] = None) -> None: """Fill out the report for the network.""" if graph is None: graph = network.as_bel() number_nodes = graph.number_of_nodes() try: average_degree = graph.number_of_edges() / graph.number_of_nodes() except ZeroDivisionError: average_degree = 0.0 report.network = network report.number_nodes = number_nodes report.number_edges = graph.number_of_edges() report.number_warnings = graph.number_of_warnings() report.number_citations = graph.number_of_citations() report.number_authors = graph.number_of_authors() report.number_components = nx.number_weakly_connected_components(graph) report.network_density = nx.density(graph) report.average_degree = average_degree report.dump_calculations(graph) report.completed = True
def connected_components(network): n_scc = nx.number_strongly_connected_components(network) n_wcc = nx.number_weakly_connected_components(network) print("# of strongly connected components: " + str(n_scc)) print("# of weakly connected components: " + str(n_wcc)) n_scc = nx.strongly_connected_component_subgraphs(network) print(max(n_scc, key=len).number_of_edges())
def print_info(G): logging.debug("begin transitiv reduction") logging.debug("nb edge : " + str(G.number_of_edges())) logging.debug("nb node : " + str(G.number_of_nodes())) logging.debug("nb strong components : " + str(nx.number_strongly_connected_components(G))) logging.debug("nb weak components : " + str(nx.number_weakly_connected_components(G)))
def components(graph): print("Number of strongly connected components", nx.number_strongly_connected_components(graph)) print("Number of weakly connected components", nx.number_weakly_connected_components(graph)) condensation = nx.condensation(graph) nx.write_edgelist(condensation, "Datasets/condenced_graph.edgelist")
def dbg_info(dag, optional_callable=None): print('-------------------------------------------------------------------') if optional_callable: optional_callable() print('Nodes: %d, edges: %d'%(dag.number_of_nodes(),dag.number_of_edges()) ) print('Is DAG?', nx.is_directed_acyclic_graph(dag)) nwcc = nx.number_weakly_connected_components(dag) print('Weakly connected components:', nwcc) dbg_pprint_source_sink_types(dag) print('-------------------------------------------------------------------')
def pattern_remove_incomplete_region_or_spatial_path( perception_graph: PerceptionGraphPattern ) -> PerceptionGraphPattern: """ Helper function to return a `PerceptionGraphPattern` verifying that region and spatial path perceptions contain a reference object. """ graph = perception_graph.copy_as_digraph() region_and_path_nodes: ImmutableSet[NodePredicate] = immutableset( node for node in graph.nodes if isinstance(node, IsPathPredicate) or isinstance(node, RegionPredicate) ) nodes_without_reference: List[NodePredicate] = [] for node in region_and_path_nodes: has_reference_edge: bool = False for successor in graph.successors(node): predicate = graph.edges[node, successor]["predicate"] if isinstance(predicate, RelationTypeIsPredicate): if predicate.relation_type in [ REFERENCE_OBJECT_LABEL, REFERENCE_OBJECT_DESTINATION_LABEL, REFERENCE_OBJECT_SOURCE_LABEL, ]: has_reference_edge = True break if not has_reference_edge: nodes_without_reference.append(node) logging.info( f"Removing incomplete regions and paths. " f"Removing nodes: {nodes_without_reference}" ) graph.remove_nodes_from(nodes_without_reference) def sort_by_num_nodes(g: DiGraph) -> int: return len(g.nodes) # We should maybe consider doing this a different way # As this approach just brute force solves the problem rather than being methodical about it if number_weakly_connected_components(graph) > 1: components = [ component for component in [ subgraph(graph, comp) for comp in weakly_connected_components(graph) ] ] components.sort(key=sort_by_num_nodes, reverse=True) computed_graph = subgraph(graph, components[0].nodes) removed_nodes: List[NodePredicate] = [] for i in range(1, len(components)): removed_nodes.extend(components[i].nodes) logging.info(f"Cleanup disconnected elements. Removing: {removed_nodes}") else: computed_graph = graph return PerceptionGraphPattern(computed_graph, dynamic=perception_graph.dynamic)
def write_components_info(G, report_file): report_file.write("===COMPONENTS_INFO===\n") report_file.write("Number of strongly connected components: {}\n".format( nx.number_strongly_connected_components(G))) report_file.write("Number of weakly connected components: {}\n".format( nx.number_weakly_connected_components(G))) report_file.write("Number of attractive components: {}\n".format( nx.number_attracting_components(G))) report_file.write("Is semiconnected: {}\n".format(nx.is_semiconnected(G)))
def get_graph_stats(df): edges = list(zip(df["src"],df["trg"])) G = nx.DiGraph() G.add_edges_from(edges) N = G.number_of_nodes() M = G.number_of_edges() wc_comp = nx.number_weakly_connected_components(G) sc_comp = nx.number_strongly_connected_components(G) return (N,M,wc_comp,sc_comp)
def build_graph(self): self.coach_map = {} self.year_map = {} self.reverse_map = {} self.G = nx.DiGraph() yf = open("basketball_start_year.csv", "r") yf.readline() for line in yf: _, k, v = line.replace("\n", "").replace("\"", "").split(" ") self.year_map[k] = int(v) yf.close() f = open("basketball_playoff_games.txt", 'r') for line in f: winner, winning_score, loser, losing_score = parse_tuple(line) if winner not in self.coach_map: winner_index = len(self.coach_map) self.coach_map[winner] = winner_index self.reverse_map[winner_index] = winner else: winner_index = self.coach_map[winner] if loser not in self.coach_map: loser_index = len(self.coach_map) self.coach_map[loser] = loser_index self.reverse_map[loser_index] = loser else: loser_index = self.coach_map[loser] new_diff = winning_score - losing_score if winner in self.year_map and loser in self.year_map: if start_year < self.year_map[winner] and self.year_map[winner] < end_year: if start_year < self.year_map[loser] and self.year_map[loser] < end_year: if new_diff > 0: add_weight = math.log(1 + 0.1 * new_diff) if self.G.get_edge_data(winner_index, loser_index): rev_weight = self.G[winner_index][loser_index]['weight'] if rev_weight > add_weight: self.G[winner_index][loser_index]['weight'] -= add_weight elif rev_weight == add_weight: self.G.remove_edge(winner_index, loser_index) else: self.G.remove_edge(winner_index, loser_index) self.G.add_edge(loser_index, winner_index, weight=(add_weight - rev_weight)) elif self.G.get_edge_data(loser_index, winner_index): self.G[loser_index][winner_index]['weight'] += add_weight else: self.G.add_edge(loser_index, winner_index, weight=add_weight) print len(self.G.nodes()) print len(self.G.edges()) print(nx.number_weakly_connected_components(self.G))
def load(self) -> None: """ Loads adjacency matrix network from provided .npy file. filename is set in class instance. """ self.graph = np.load(self.networkfile) self.nxgraph = nx.DiGraph(self.graph) if nx.number_weakly_connected_components(self.nxgraph) > 1: self.subgraphs = True
def read_graph(filename): g = nx.DiGraph() with open (filename) as f: for lines in f: lines1=lines.split() g.add_node(lines1[0] + "_" + lines1[2]) g.add_node(lines1[1] + "_" + lines1[3]) g.node[lines1[0] + "_" + lines1[2]]['active']=1 g.node[lines1[1] + "_" + lines1[3]]['active']=int(lines1[4]) g.add_edge(lines1[0] + "_" + lines1[2], lines1[1] + "_" + lines1[3]) nx.write_graphml(g, filename.split('.')[0]+'_hgraph.graphml') print nx.number_weakly_connected_components(g) print nx.number_strongly_connected_components(g)
def validate(self): errors = [] # count number of subgraphs # if nx.number_connected_components(graph.dg.to_undirected()) > 1: if nx.number_weakly_connected_components(self.dg) > 1: errors.append('no subgraphs are allowed') for node in self.nodes.values(): errors.extend(node.validate()) # node.validate() return errors
def output_conectivity_info (graph, path): """Output connectivity information about the graph. graph : (networkx.Graph) path: (String) contains the path to the output file """ with open(path, 'w') as out: out.write('***Conectivity***\n') out.write('Is weakly connected: %s\n' % nx.is_weakly_connected(graph)) out.write('Number of weakly connected components: %d\n' % nx.number_weakly_connected_components(graph)) out.write('Is strongly connected: %s\n' % nx.is_strongly_connected(graph)) out.write('Number of strongly connected components: %d' % nx.number_strongly_connected_components(graph))
def _connected_components(weighted_projection): if isinstance(weighted_projection, nx.DiGraph): return nx.number_weakly_connected_components(weighted_projection) else: from graph_tool.topology import label_components G = weighted_projection G.set_directed(False) _, comps = label_components(G) G.set_directed(True) return len(comps)
def dbg_info(dag, optional_callable=None): print( '-------------------------------------------------------------------') if optional_callable: optional_callable() print('Nodes: %d, edges: %d' % (dag.number_of_nodes(), dag.number_of_edges())) print('Is DAG?', nx.is_directed_acyclic_graph(dag)) nwcc = nx.number_weakly_connected_components(dag) print('Weakly connected components:', nwcc) dbg_pprint_source_sink_types(dag) print( '-------------------------------------------------------------------')
def print_stats(graph): """Print statistics about the mutation graph.""" sources = [n for n, in_degree in graph.in_degree() if in_degree == 0] sinks = [n for n, out_degree in graph.out_degree() if out_degree == 0] min_len, max_len = get_path_stats(graph, sources, sinks) num_connected_components = nx.number_weakly_connected_components(graph) print('num. source nodes: %d' % len(sources)) print('num. sink nodes: %d' % len(sinks)) print('num. connected components: %d' % num_connected_components) print('shortest mutation chain: %d' % min_len) print('longest mutation chain: %d' % max_len)
def _describe_list(self) -> List[Tuple[str, float]]: """Return useful information about the graph as a list of tuples.""" number_nodes = self.number_of_nodes() return [ ('Number of Nodes', number_nodes), ('Number of Edges', self.number_of_edges()), ('Number of Citations', self.number_of_citations()), ('Number of Authors', self.number_of_authors()), ('Network Density', '{:.2E}'.format(nx.density(self))), ('Number of Components', nx.number_weakly_connected_components(self)), ('Number of Warnings', self.number_of_warnings()), ]
def generate_oriented(graph, X): target_feats = np.empty((X.shape[0], 5)) source_feats = np.empty((X.shape[0], 5)) edge_feats = np.empty((X.shape[0], 11)) l = X.shape[0] t1 = time() for i, x in enumerate(X): t = x[0] s = x[1] in_d_t, out_d_t, scc_t, wcc_t, sccp_t, n_in_t, n_out_t, n_t, np_t = all_oriented_vertex( graph, t) in_d_s, out_d_s, scc_s, wcc_s, sccp_s, n_in_s, n_out_s, n_s, np_s = all_oriented_vertex( graph, s) com_in = len(set(n_in_t).intersection(n_in_s)) com_on = len(set(n_out_t).intersection(n_out_s)) trans_ts = len(set(n_out_t).intersection(n_in_s)) trans_st = len(set(n_out_s).intersection(n_in_t)) friends_measure_st = 0 friends_measure_ts = 0 for ns in n_s: for nt in n_t: if graph.has_edge(ns, nt): friends_measure_st += 1 if graph.has_edge(nt, ns): friends_measure_ts += 1 nh = list(set(n_t).union(n_s)) nh_plus = list(set(np_t).union(np_s)) sub_nh = graph.subgraph(nh) sub_nh_plus = graph.subgraph(nh_plus) scc = nx.number_strongly_connected_components(sub_nh) wcc = nx.number_weakly_connected_components(sub_nh) scc_plus = nx.number_strongly_connected_components(sub_nh_plus) if not nx.has_path(graph, s, t): len_path_st = -1 else: len_path_st = nx.shortest_path_length(graph, s, t) if not nx.has_path(graph, t, s): len_path_ts = -1 else: len_path_ts = nx.shortest_path_length(graph, t, s) target_feats[i] = [in_d_t, out_d_t, scc_t, wcc_t, sccp_t] source_feats[i] = [in_d_s, out_d_s, scc_s, wcc_s, sccp_s] edge_feats[i] = [ com_in, com_on, trans_ts, trans_st, friends_measure_st, friends_measure_ts, scc, wcc, scc_plus, len_path_st, len_path_ts ] if i % 10000 == 0: print(i, l) t2 = time() print(t2 - t1) t1 = t2 return target_feats, source_feats, edge_feats
def update_model(self, edge: Tuple[str, str], manipulation: int, allow_disconnecting: bool = True, allow_cycles: bool = True) -> bool: """ Updates model according to action and returns the success of the operation. Reversing and removing an edge that doesn't exists has no effect. Adding an edge which already exists has no effect. :param edge: The edge to be manipulated. e.g. (X0, X1) :param manipulation: 0 = remove edge, 1 = add edge, 2 = reverse edge :param allow_disconnecting: If true, manipulations which disconnect the causal graph can be executed. :param allow_cycles: If true, manipulations which result in a cycle can be executed. :return: True if the manipulation was successful. False if it wasn't or it was illegal according to 'allow_disconnecting' or 'allow_cycles'. """ if manipulation == 0: # remove edge if exists if self.causal_model.has_edge(edge[0], edge[1]): self.causal_model.remove_edge(edge[0], edge[1]) removed_edge = (edge[0], edge[1]) else: return False # disconnected graph if not allow_disconnecting and nx.number_weakly_connected_components(self.causal_model) > 1: self.causal_model.add_edge(removed_edge[0], removed_edge[1]) return False elif manipulation == 1: # add edge if not self.causal_model.has_edge(edge[0], edge[1]): # only add edge if not already there self.causal_model.add_edge(edge[0], edge[1]) else: return False if not nx.is_directed_acyclic_graph(self.causal_model) and not allow_cycles: # check if became cyclic self.causal_model.remove_edge(edge[0], edge[1]) return False elif manipulation == 2: # reverse edge if self.causal_model.has_edge(edge[0], edge[1]): self.causal_model.remove_edge(edge[0], edge[1]) self.causal_model.add_edge(edge[1], edge[0]) added_edge = (edge[1], edge[0]) else: return False if not nx.is_directed_acyclic_graph(self.causal_model) and not allow_cycles: # check if became cyclic self.causal_model.remove_edge(added_edge[0], added_edge[1]) self.causal_model.add_edge(added_edge[1], added_edge[0]) return False return True
def track_df(self, df, identifiers): """ Tracks the objects in df Assigns 3 additional columns to df (unique_id, segment_id and track_id). Parameters ---------- df : pd.DataFrame a pandas dataframe containing the x, y, and t coordinates of objects identifiers : list list of column names for coordinates in df and label column (e.g. ['x_coord', 'y_coord', 'timepoint', 'labels']) """ self.df = df.copy() self.identifiers = identifiers self.number_of_objects = len(df) self.number_of_timepoints = np.max(np.unique(df[identifiers[2]])) self.adjacency_matrix = np.zeros( [self.number_of_objects, self.number_of_objects]) # add unique identifiers to df self.df['unique_id'] = list(range(0, self.number_of_objects)) # link timepoints to get segments self.__get_track_segments() # try to link the segments among themselves print('linking track segments across timepoints') self.__close_gaps() # get the final tracks self.G2 = nx.DiGraph(self.adjacency_matrix) self.number_of_tracks = nx.number_weakly_connected_components(self.G2) self.tracks = [ sorted(c) for c in sorted( nx.weakly_connected_components(self.G2), key=len, reverse=True) ] self.tracks_by_label = [ list(self.df[self.identifiers[3]].iloc[sorted(c)]) for c in sorted( nx.weakly_connected_components(self.G2), key=len, reverse=True) ] # add column for track ids track_ids = [] for obj in range(0, self.number_of_objects): for track_id, track in enumerate(self.tracks): if obj in track: track_ids.append(track_id) self.df['track_id'] = track_ids
def calculate_complexity(self): ''' Calculates cyclomatic compexity of graph according to McCabe (1976) - A Complexity Measure v(G) = e - n + p where: v(G) cyclomatic complexity e number of edges n number of nodes p number of connected components It is assumed that ending nodes can loop back to starting nodes by re-execution of the program, thus the graph is assumed to always be strongly connected. Note: v(G) is the size of the basis set of the graph - i.e. maximum number of linearly independent paths in G ''' return self.graph.number_of_edges() - self.graph.number_of_nodes() + \ nx.number_weakly_connected_components(self.graph)
def all_graphs(num_nodes,directed_motifs): """ Retuns a list of all possible single component graphs given some number of nodes. This function is meant primarily as a helper function to get_subgraphs, but can be used for other user purposes. Parameters ---------- num_nodes : The number of nodes in the complete graph directed_motifs : A boolean designating whether the motifs should be directed. Returns ---------- graphs : A list of all possible single component graphs given some number of nodes """ graphs=list() complete=nx.complete_graph(num_nodes) # Start with complete graph complete_copy=copy.deepcopy(complete) if directed_motifs: complete=complete.to_directed() complete_copy=complete_copy.to_directed() complete_ud=all_graphs(num_nodes,False) # RECURSION, FTW! edges=complete.edges() while complete.number_of_edges()>0: # Iteratively remove edges, and capture single component subgraphs e=edges.pop() complete.remove_edge(e[0],e[1]) if directed_motifs: if nx.number_weakly_connected_components(complete)==1: graphs.append(copy.deepcopy(complete)) else: if nx.number_connected_components(complete)==1: graphs.append(copy.deepcopy(complete)) graphs.append(complete_copy) # Add recursively produced undirected graphs to directed set if directed_motifs: graphs_edges=map(lambda g: g.edges(), graphs) graphs_edges.sort() for i in complete_ud: ud_edges=i.edges() ud_edges.sort() if ud_edges not in graphs_edges: graphs.append(i.to_directed()) return graphs
def save_network_statistics(g): stats = {} stats['num_weakly_connected_components'] = nx.number_weakly_connected_components(g) stats['num_strongly_connected_components'] = nx.number_strongly_connected_components(g) stats['num_nodes'] = nx.number_of_nodes(g) stats['num_edges'] = nx.number_of_edges(g) stats['density'] = nx.density(g) try: stats['avg_clustering_coef'] = nx.average_clustering(g) except: stats['avg_clustering_coef'] = None # not defined for directed graphs stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes']) stats['transitivity'] = nx.transitivity(g) try: stats['diameter'] = nx.diameter(g) except: stats['diameter'] = None # unconnected --> infinite path length between connected components with open('./network-statistics/twitter-combined-statistics.txt', 'wb') as f: for stat_name, stat_value in stats.iteritems(): f.write(stat_name + ': ' + str(stat_value) + '\n')
def run(filename, gt_file, n_iter): f=open(filename) line1=f.readline() print line1 f.close() if len(line1.split()) !=2: g=input1(filename) else: g=input2(filename) read_to_chr_map={} pos_dict = {} mapping_dict = {} chr_lengths = {} for chr in range(14): chr_lengths[chr] = 1000 with open(gt_file,'r') as f: for num, line in enumerate(f.readlines()): m = map(int, line.strip().split()) # mapping_dict[num] = [min(m), max(m), int(m[0]>m[1])] read_to_chr_map[m[0]]= str(m[1]) mapping_dict[num] = m[1] pos_dict[num] = [min(m[2],m[3]),max(m[2],m[3])] # pos_dict[num] = [m[2],m[3],int(m[2]>m[3])] chr_lengths[m[1]] = max(chr_lengths[m[1]],max(m[2],m[3])) print nx.info(g) print "Chromosome lenghts:" print chr_lengths margin = 10000 del_count = 0 #print nx.info(g) print "Num reads read : "+str(len(read_to_chr_map)) for cur_edge in g.edges(): node0=int(cur_edge[0].split('_')[0]) node1=int(cur_edge[1].split('_')[0]) # g.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = "{0:.2f}".format(1.0*pos_dict[node0][1]/chr_lengths[mapping_dict[node0]]) # g.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = "{0:.2f}".format(1.0*pos_dict[node1][0]/chr_lengths[mapping_dict[node1]]) # st_pc is the "start percentage"; i.e., the percent location of edge[0] on its original chromosome # end_pc is the "end percentage"; i.e., the percent location of edge[1] on its original chromosome g.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = 1.0*pos_dict[node0][1]/chr_lengths[mapping_dict[node0]] g.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = 1.0*pos_dict[node1][0]/chr_lengths[mapping_dict[node1]] for node in g.nodes(): nodeid=int(node.split('_')[0]) if pos_dict[nodeid][0] < margin: g.remove_node(node) del_count += 1 continue if pos_dict[nodeid][1] > chr_lengths[mapping_dict[nodeid]] - margin: g.remove_node(node) del_count += 1 continue g.node[node]['count'] = 1 g.node[node]['read'] = node #print str(nodeid), node,g.node[node]['chr'] print "Deleted nodes: "+str(del_count) degree_sequence=sorted(g.degree().values(),reverse=True) print Counter(degree_sequence) for i in range(n_iter): for node in g.nodes(): if g.in_degree(node) == 0: g.remove_node(node) print nx.info(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) g.graph['aval'] = 1000000000 for i in range(5): merge_simple_path(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) h=nx.DiGraph() h.add_nodes_from(g) h.add_edges_from(g.edges()) for cur_edge in h.edges(): h.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = g.edge[cur_edge[0]][cur_edge[1]]['st_pc'] h.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = g.edge[cur_edge[0]][cur_edge[1]]['end_pc'] # h = g.copy() for node in g.nodes(): reads_in_node=[int(x.split('_')[0]) for x in g.node[node]['read'].split(':')] try: chr_in_node=map(lambda x: read_to_chr_map[x], reads_in_node) except: print reads_in_node,g.node[node]['read'] return chr_in_node_set=set(chr_in_node) if len(chr_in_node_set) ==1: h.node[node]['chr']=chr_in_node[0] else: h.node[node]['chr']= ':'.join(chr_in_node) h.node[node]['count']=g.node[node]['count'] try: h.node[node]['read']=g.node[node]['read'] except: pass try: import ujson mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json')) print 'get mapping' for node in h.nodes(): #print node if mapping.has_key(node): h.node[node]['aln_start'] = mapping[node][0] h.node[node]['aln_end'] = mapping[node][1] h.node[node]['aln_strand'] = mapping[node][2] else: h.node[node]['aln_start'] = 0 h.node[node]['aln_end'] = 0 h.node[node]['aln_strand'] = 0 except: pass nx.write_graphml(h, filename.split('.')[0]+'_condensed_annotated.graphml') nx.write_graphml(g, filename.split('.')[0]+'_G_condensed_annotated.graphml') print nx.number_weakly_connected_components(h) print nx.number_strongly_connected_components(h)
def de_clip(filename, n_nodes, hinge_list,gt_file): n_iter = 5 f=open(filename) line1=f.readline() print line1 f.close() extension = filename.split('.')[-1] if extension == 'graphml': g=input3(filename) elif len(line1.split()) !=2: g=input1(filename) else: g=input2(filename) print nx.info(g) degree_sequence=sorted(g.degree().values(),reverse=True) print Counter(degree_sequence) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) try: import ujson mapping = ujson.load(open(gt_file)) print 'getting mapping' mapped_nodes=0 print str(len(mapping)) print str(len(g.nodes())) for node in g.nodes(): # print node node_base=node.split("_")[0] # print node_base #print node if mapping.has_key(node_base): g.node[node]['aln_start'] = min (mapping[node_base][0][0],mapping[node_base][0][1]) g.node[node]['aln_end'] = max(mapping[node_base][0][1],mapping[node_base][0][0]) g.node[node]['chr'] = mapping[node_base][0][2] mapped_nodes+=1 else: # pass g.node[node]['aln_start'] = 0 g.node[node]['aln_end'] = 0 g.node[node]['aln_strand'] = 0 for edge in g.edges_iter(): in_node=edge[0] out_node=edge[1] # print 'akjdfakjhfakljh' if ((g.node[in_node]['aln_start'] < g.node[out_node]['aln_start'] and g.node[out_node]['aln_start'] < g.node[in_node]['aln_end']) or (g.node[in_node]['aln_start'] < g.node[out_node]['aln_end'] and g.node[out_node]['aln_end'] < g.node[in_node]['aln_end'])): g.edge[in_node][out_node]['false_positive']=0 else: g.edge[in_node][out_node]['false_positive']=1 except: raise # print "json "+filename.split('.')[0]+'.mapping.json'+" not found. exiting." print hinge_list print str(mapped_nodes)+" out of " +str(len(g.nodes()))+" nodes mapped." # for i in range(5): # merge_simple_path(g) # degree_sequence=sorted(nx.degree(g).values(),reverse=True) # print Counter(degree_sequence) in_hinges = set() out_hinges = set() num_iter=10000 iter_done=0 if hinge_list != None: print "Found hinge list." with open(hinge_list,'r') as f: for lines in f: lines1=lines.split() if lines1[2] == '1': in_hinges.add(lines1[0]+'_0') out_hinges.add(lines1[0]+'_1') elif lines1[2] == '-1': in_hinges.add(lines1[0]+'_1') out_hinges.add(lines1[0]+'_0') print str(len(in_hinges))+' hinges found.' for node in g.nodes(): if node in in_hinges and node in out_hinges: g.node[node]['hinge']=100 elif node in in_hinges: g.node[node]['hinge']=10 elif node in out_hinges: g.node[node]['hinge']=-10 else: g.node[node]['hinge']=0 while len(g.nodes()) > n_nodes and iter_done < num_iter : node = g.nodes()[random.randrange(len(g.nodes()))] iter_done+=1 # print iter_done if g.in_degree(node) == 1 and g.out_degree(node) == 1: base_node=node.split("_")[0] orintation = node.split("_")[1] # if orintation=='1': # node2=base_node+'_0' # else: # node2=base_node+'_1' # print node,node2 in_node = g.in_edges(node)[0][0] out_node = g.out_edges(node)[0][1] if g.node[node]['hinge']==0 and g.node[in_node]['hinge']==0 and g.node[out_node]['hinge']==0: if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1: if in_node != node and out_node != node and in_node != out_node: bad_node=False # print g.in_edges(node) # print g.edge[g.in_edges(node)[0][0]][g.in_edges(node)[0][1]] # print g.out_edges(node) for in_edge in g.in_edges(node): if g.edge[in_edge[0]][in_edge[1]]['false_positive']==1: bad_node=True for out_edge in g.out_edges(node): if g.edge[out_edge[0]][out_edge[1]]['false_positive']==1: bad_node=True if not bad_node: #print in_node, node, out_node merge_path(g,in_node,node,out_node) # print g.edge[edge1[0]][edge1[1]]['hinge_edge'] for nd in g.nodes(): if len(nd.split("_"))==1: print nd + " in trouble" # in_node = g.in_edges(node2)[0][0] # out_node = g.out_edges(node2)[0][1] # if g.node[node2]['hinge']==0 and g.node[in_node]['hinge']==0 and g.node[out_node]['hinge']==0: # if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1: # if in_node != node2 and out_node != node2 and in_node != out_node: # bad_node=False # for in_edge in g.in_edges(node2): # if g.edge[in_edge]==1: # bad_node=True # for out_edge in g.out_edges(node2): # if g.edge[out_edge]==1: # bad_node=True # if not bad_node: # #print in_node, node, out_node # merge_path(g,in_node,node2,out_node) # for nd in g.nodes(): # print nd else: while len(g.nodes()) > n_nodes: node = g.nodes()[random.randrange(len(g.nodes()))] if g.in_degree(node) == 1 and g.out_degree(node) == 1: # assert g.in_degree(node2) == 1 and g.out_degree(node2) == 1 # edge_1 = g.out_edges(node)[0] # edge_2 = g.in_edges(node)[0] edge1 = g.out_edges(node)[0] edge2 = g.in_edges(node)[0] # print g.edge[edge1[0]][edge1[1]]['hinge_edge'] if (g.edge[edge1[0]][edge1[1]]['hinge_edge'] == -1 and g.edge[edge2[0]][edge2[1]]['hinge_edge'] == -1): in_node = g.in_edges(node)[0][0] out_node = g.out_edges(node)[0][1] if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1: if in_node != node and out_node != node and in_node != out_node: #print in_node, node, out_node merge_path(g,in_node,node,out_node) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) nx.write_graphml(g, filename.split('.')[0]+'.sparse3.graphml') print nx.number_weakly_connected_components(g) print nx.number_strongly_connected_components(g)
def n_components(self): return networkx.number_weakly_connected_components(self)
if to_node.is_white: G.add_node(to_node_position, side="WHITE") elif to_node.is_black: G.add_node(to_node_position, side="BLACK") except AttributeError: G.add_node(to_node_position, side="UNDEFINED") if to_node == None: G.add_edge(from_node_position, to_node_position, weight=0.0) elif i.is_opponent(to_node): G.add_edge(from_node_position, to_node_position, weight=-1.0) else: G.add_edge(from_node_position, to_node_position, weight=1.0) fout.write(move + '\t') fout.write(str(side) + '\t') fout.write(str(nx.number_strongly_connected_components(G)) + '\t') fout.write(str(nx.number_weakly_connected_components(G)) + '\t') try: fout.write(str(nx.average_shortest_path_length(G)) + '\t') except: fout.write("Not connected" + '\t') fout.write(str(side == winner) + '\n') ''' edefensive=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >= 0.0] #eoffensive=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <= 0.0] pos=nx.circular_layout(G) # positions for all nodes # nodes if side == 1: nx.draw_networkx_nodes(G,pos,nodelist=[node for (node,d) in G.nodes(data=True) if d['side'] == "BLACK"], node_color='blue',node_size=300) nx.draw_networkx_nodes(G,pos,nodelist=[node for (node,d) in G.nodes(data=True) if d['side'] == "UNDEFINED"], node_color='blue',node_size=300)
G = nx.read_edgelist('Email-EuAll.txt', create_using=nx.DiGraph(), nodetype=int) print('') print("Properti jaringan") print("-----------------") print('') N, K = G.order(), G.size() avg_deg = float(K)/N print('Jumlah node: %d' % N) print('Jumlah edge: %d' % K) print('Rata-rata edge: %.3f' % avg_deg) print('Jumlah strongly connected component: %d' % nx.number_strongly_connected_components(G)) print('Jumlah weakly connected component: %d' % nx.number_weakly_connected_components(G)) print('') print('Distribusi degree') print('-----------------') print('') in_degrees = G.in_degree() in_values = sorted(set(in_degrees.values())) in_hist = [in_degrees.values().count(x) for x in in_values] out_degrees = G.out_degree() out_values = sorted(set(out_degrees.values())) out_hist = [out_degrees.values().count(x) for x in out_values] print('Pembuatan grafik...') plt.figure() plt.grid(True)
#time_vector.sort() sorted_time_d = sorted(time_d.items(),key=operator.itemgetter(1)) name_vector = [ele[0] for ele in sorted_time_d] #only includes rters time_vector = [ele[1] for ele in sorted_time_d] N_t = len(time_vector) N_ut = number_of_unique_nodes - N_t #print time_d.values() #continue #edges = [(srcs[j],rters[j]) for j in xrange(len(srcs))] subgraph = nx.DiGraph() subgraph.add_edges_from(edges) if nx.number_weakly_connected_components(subgraph) == 1: #print time_vector #print N_t,N_ut cnt += 1 result = my_EM(N_t,N_ut,time_vector) named_result = {} unknown_result = {} sorted_result = sorted(result.items(),key=operator.itemgetter(1),reverse=True) #indexed_gnd_truth = sorted(gnd_truth.items()[0],gnd_truth.items()[1]) #evaluate top k nodes acc, pick 2K candidates #K = [1:6]
import json from util.read_utils import lines_per_n import community import networkx as nx author_graph = nx.DiGraph() with open('clean_data.json', 'r') as jfile: for chunk in lines_per_n(jfile, 9): hdr_data = json.loads(chunk) for to_addr in str(hdr_data['To']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time']) for cc_addr in str(hdr_data['Cc']).split(","): if '@' in to_addr: author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time']) jfile.close() print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph)) print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph)) print("Nodes:", nx.number_of_nodes(author_graph)) print("Edges:", nx.number_of_edges(author_graph)) #The following lines of code generate a dendogram for the above graph dendo = community.generate_dendogram(author_graph.to_undirected()) for level in range(len(dendo)) : print("Partition at level", level, "is", community.partition_at_level(dendo, level)) print("-"*10)
def mask_test_edges_directed(adj, test_frac=.1, val_frac=.05, prevent_disconnect=True, verbose=False, false_edge_sampling='iterative'): if verbose == True: print 'preprocessing...' # Remove diagonal elements adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 # Convert to networkx graph to calc num. weakly connected components g = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph()) orig_num_wcc = nx.number_weakly_connected_components(g) adj_tuple = sparse_to_tuple(adj) # (coords, values, shape) edges = adj_tuple[0] # List of ALL edges (either direction) edge_pairs = [(edge[0], edge[1]) for edge in edges] # store edges as list of tuples (from_node, to_node) num_test = int(np.floor(edges.shape[0] * test_frac)) # controls how large the test set should be num_val = int(np.floor(edges.shape[0] * val_frac)) # controls how alrge the validation set should be num_train = len(edge_pairs) - num_test - num_val # num train edges all_edge_set = set(edge_pairs) train_edges = set(edge_pairs) # init train_edges to have all edges test_edges = set() # init test_edges as empty set val_edges = set() # init val edges as empty set ### ---------- TRUE EDGES ---------- ### # Shuffle and iterate over all edges np.random.shuffle(edge_pairs) # get initial bridge edges bridge_edges = set(nx.bridges(nx.to_undirected(g))) if verbose: print('creating true edges...') for ind, edge in enumerate(edge_pairs): node1, node2 = edge[0], edge[1] # Recalculate bridges every ____ iterations to relatively recent if ind % 10000 == 0: bridge_edges = set(nx.bridges(nx.to_undirected(g))) # Don't sample bridge edges to increase likelihood of staying connected if (node1, node2) in bridge_edges or (node2, node1) in bridge_edges: continue # If removing edge would disconnect the graph, backtrack and move on g.remove_edge(node1, node2) if prevent_disconnect == True: if not nx.is_weakly_connected(g): g.add_edge(node1, node2) continue # Fill test_edges first if len(test_edges) < num_test: test_edges.add(edge) train_edges.remove(edge) if len(test_edges) % 10000 == 0 and verbose == True: print 'Current num test edges: ', len(test_edges) # Then, fill val_edges elif len(val_edges) < num_val: val_edges.add(edge) train_edges.remove(edge) if len(val_edges) % 10000 == 0 and verbose == True: print 'Current num val edges: ', len(val_edges) # Both edge lists full --> break loop elif len(test_edges) == num_test and len(val_edges) == num_val: break # Check that enough test/val edges were found if (len(val_edges) < num_val or len(test_edges) < num_test): print "WARNING: not enough removable edges to perform full train-test split!" print "Num. (test, val) edges requested: (", num_test, ", ", num_val, ")" print "Num. (test, val) edges returned: (", len(test_edges), ", ", len(val_edges), ")" # Print stats for largest remaining WCC print 'Num WCC: ', nx.number_weakly_connected_components(g) largest_wcc_set = max(nx.weakly_connected_components(g), key=len) largest_wcc = g.subgraph(largest_wcc_set) print 'Largest WCC num nodes: ', largest_wcc.number_of_nodes() print 'Largest WCC num edges: ', largest_wcc.number_of_edges() if prevent_disconnect == True: assert nx.number_weakly_connected_components(g) == orig_num_cc # Fraction of edges with both endpoints in largest WCC def frac_edges_in_wcc(edge_set): num_wcc_contained_edges = 0.0 num_total_edges = 0.0 for edge in edge_set: num_total_edges += 1 if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set: num_wcc_contained_edges += 1 frac_in_wcc = num_wcc_contained_edges / num_total_edges return frac_in_wcc # Check what percentage of edges have both endpoints in largest WCC print 'Fraction of train edges with both endpoints in L-WCC: ', frac_edges_in_wcc(train_edges) print 'Fraction of test edges with both endpoints in L-WCC: ', frac_edges_in_wcc(test_edges) print 'Fraction of val edges with both endpoints in L-WCC: ', frac_edges_in_wcc(val_edges) # Ignore edges with endpoint not in largest WCC print 'Removing edges with either endpoint not in L-WCC from train-test split...' train_edges = {edge for edge in train_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set} test_edges = {edge for edge in test_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set} val_edges = {edge for edge in val_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set} ### ---------- FALSE EDGES ---------- ### # Initialize empty sets train_edges_false = set() test_edges_false = set() val_edges_false = set() # Generate candidate false edges (from g-complement) and iterate through them if false_edge_sampling == 'iterative': if verbose == True: print 'preparing complement adjacency matrix...' # Sample false edges from G-complement, instead of randomly generating edges # g_complement = nx.complement(g) adj_complement = 1 - adj.toarray() # flip 0's, 1's in adjacency matrix np.fill_diagonal(adj_complement, val=0) # set diagonals to 0 # 2 numpy arrays indicating x, y coords in adj_complement # WARNING: This line can use up a lot of RAM depending on 'adj' size idx1, idx2 = np.where(adj_complement == 1) edges_false = np.stack((idx1, idx2), axis=-1) # stack arrays into coord pairs. edge_pairs_false = [(edge[0], edge[1]) for false_edge in edges_false] # Shuffle and iterate over false edges np.random.shuffle(edge_pairs_false) if verbose == True: print 'adding candidate false edges to false edge sets...' for false_edge in edge_pairs_false: # Fill train_edges_false first if len(train_edges_false) < len(train_edges): train_edges_false.add(false_edge) if len(train_edges_false) % 100000 == 0 and verbose == True: print 'Current num false train edges: ', len(train_edges_false) # Fill test_edges_false next elif len(test_edges_false) < len(test_edges): test_edges_false.add(false_edge) if len(test_edges_false) % 100000 == 0 and verbose == True: print 'Current num false test edges: ', len(test_edges_false) # Fill val_edges_false last elif len(val_edges_false) < len(val_edges): val_edges_false.add(false_edge) if len(val_edges_false) % 100000 == 0 and verbose == True: print 'Current num false val edges: ', len(val_edges_false) # All sets filled --> break elif len(train_edges_false) == len(train_edges) and \ len(test_edges_false) == len(test_edges) and \ len(val_edges_false) == len(val_edges): break # Randomly generate false edges (idx_i, idx_j) 1 at a time to save memory elif false_edge_sampling == 'random': if verbose == True: print 'creating false test edges...' # FALSE TEST EDGES while len(test_edges_false) < len(test_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: # no self-loops continue # Ensure both endpoints are in largest WCC if idx_i not in largest_wcc_set or idx_j not in largest_wcc_set: continue false_edge = (idx_i, idx_j) # Make sure false_edge not an actual edge, and not a repeat if false_edge in all_edge_set: continue if false_edge in test_edges_false: continue test_edges_false.add(false_edge) if len(test_edges_false) % 100000 == 0 and verbose == True: print 'Current num false test edges: ', len(test_edges_false) # FALSE VAL EDGES if verbose == True: print 'creating false val edges...' while len(val_edges_false) < len(val_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue false_edge = (idx_i, idx_j) # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat if false_edge in all_edge_set or \ false_edge in test_edges_false or \ false_edge in val_edges_false: continue val_edges_false.add(false_edge) if len(val_edges_false) % 100000 == 0 and verbose == True: print 'Current num false val edges: ', len(val_edges_false) # FALSE TRAIN EDGES if verbose == True: print 'creating false train edges...' while len(train_edges_false) < len(train_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue false_edge = (idx_i, idx_j) # Make sure false_edge in not an actual edge, not in test_edges_false, # not in val_edges_false, not a repeat if false_edge in all_edge_set or \ false_edge in test_edges_false or \ false_edge in val_edges_false or \ false_edge in train_edges_false: continue train_edges_false.add(false_edge) if len(train_edges_false) % 100000 == 0 and verbose == True: print 'Current num false train edges: ', len(train_edges_false) ### ---------- FINAL DISJOINTNESS CHECKS ---------- ### if verbose == True: print 'final checks for disjointness...' # assert: false_edges are actually false (not in all_edge_tuples) assert test_edges_false.isdisjoint(all_edge_set) assert val_edges_false.isdisjoint(all_edge_set) assert train_edges_false.isdisjoint(all_edge_set) # assert: test, val, train false edges disjoint assert test_edges_false.isdisjoint(val_edges_false) assert test_edges_false.isdisjoint(train_edges_false) assert val_edges_false.isdisjoint(train_edges_false) # assert: test, val, train positive edges disjoint assert val_edges.isdisjoint(train_edges) assert test_edges.isdisjoint(train_edges) assert val_edges.isdisjoint(test_edges) if verbose == True: print 'creating adj_train...' # Re-build adj matrix using remaining graph adj_train = nx.adjacency_matrix(g) # Convert edge-lists to numpy arrays train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges]) train_edges_false = np.array([list(edge_tuple) for edge_tuple in train_edges_false]) val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges]) val_edges_false = np.array([list(edge_tuple) for edge_tuple in val_edges_false]) test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges]) test_edges_false = np.array([list(edge_tuple) for edge_tuple in test_edges_false]) if verbose == True: print 'Done with train-test split!' print 'Num train edges (true, false): (', train_edges.shape[0], ', ', train_edges_false.shape[0], ')' print 'Num test edges (true, false): (', test_edges.shape[0], ', ', test_edges_false.shape[0], ')' print 'Num val edges (true, false): (', val_edges.shape[0], ', ', val_edges_false.shape[0], ')' print '' # Return final edge lists (edges can go either direction!) return adj_train, train_edges, train_edges_false, \ val_edges, val_edges_false, test_edges, test_edges_false
#time_vector.sort() sorted_time_d = sorted(time_d.items(),key=operator.itemgetter(1)) name_vector = [ele[0] for ele in sorted_time_d] #only includes rters with exact time time_vector = [ele[1] for ele in sorted_time_d] N_t = len(time_vector) N_ut = number_of_unique_nodes - N_t #cascade_cent_array = centrality_look_up(ind_d,cent_df,unique_nodes.keys()) #print time_d.values() #continue #edges = [(srcs[j],rters[j]) for j in xrange(len(srcs))] subgraph = nx.DiGraph() subgraph.add_edges_from(edges) number_of_weakly_cc = nx.number_weakly_connected_components(subgraph) if nx.number_weakly_connected_components(subgraph) <= 10: #need to handle nodes without gnd_truth #print time_vector #print N_t,N_ut cnt += 1 #result = my_EM(N_t,N_ut,time_vector) #result = centrality_look_up(ind_d,cent_df,name_vector)[0] unique_nodes_list = unique_nodes.keys() cent_inds = centrality_look_up(ind_d,cent_df,unique_nodes_list)[0] cent_names = [unique_nodes_list[ind] for ind in cent_inds]
def de_clip(filename, n_iter): g = nx.MultiDiGraph() # count = 0 with open(filename,'r') as f: for line in f.xreadlines(): l = line.strip().split() #print l2 g.add_edge(l[0],l[1],overlap=int(l[2])/2) # if count < 10: # print l[0], l[1], l[2] # count += 1 node0start = int(l[7][1:]) node0end = int(l[8][:-1]) g.node[l[0]]['length'] = node0end - node0start node1start = int(l[9][1:]) node1end = int(l[10][:-1]) g.node[l[1]]['length'] = node1end - node1start print nx.info(g) try: import ujson mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json')) # print mapping print 'get mapping' for node in g.nodes(): #print node if mapping.has_key(node): # alnstart = int(mapping[node][0]) # alnend = int(mapping[node][1]) # g.node[node]['length'] = abs(alnend-alnstart) # print abs(alnend-alnstart) g.node[node]['aln_strand'] = mapping[node][3] # g.node[node]['aln_start'] = mapping[node][0] # g.node[node]['aln_end'] = mapping[node][1] # g.node[node]['aln_strand'] = mapping[node][2] else: # g.node[node]['length'] = 5000 g.node[node]['aln_strand'] = 5 # print "this happened" # g.node[node]['aln_start'] = 0 # g.node[node]['aln_end'] = 0 # g.node[node]['aln_strand'] = 0 except: pass degree_sequence=sorted(g.degree().values(),reverse=True) print Counter(degree_sequence) for i in range(n_iter): for node in g.nodes(): if g.degree(node) < 2: g.remove_node(node) print nx.info(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) g.graph['aval'] = 1000000000 for i in range(5): merge_simple_path(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) nx.write_graphml(g, filename.split('.')[0]+'.graphml') print nx.number_weakly_connected_components(g) print nx.number_strongly_connected_components(g) # Next we create the gfa file outputfile = filename.split('.')[0]+'.gfa' with open(outputfile, 'w') as fout: for cur_node in g.nodes(): node_length = g.node[cur_node]['length'] node_str = 'A'*node_length node_str = node_str + '\n' fout.write("NODE "+str(cur_node)+' 0 0 0 0 0\n') fout.write(node_str) fout.write(node_str) # print "NODE "+str(node) for arc in g.edges(): fout.write("ARC "+str(arc[0])+' '+str(arc[1])+' 0\n') # Compute N50 contig_lengths = [] for cur_node in g.nodes(): contig_lengths.append(g.node[cur_node]['length']) print "N50 = "+str(comp_n50(contig_lengths))
import matplotlib.pyplot as plt from math import log ################################## ######### READ EDGE LIST ######### ################################## print('Reading edgelist') # Read combined edge-list twitter_edges_dir = './twitter/twitter_combined.txt' edges_f = open(twitter_edges_dir) # Parse edgelist into directed graph twitter_g = nx.read_edgelist(edges_f, nodetype=int, create_using=nx.DiGraph()) print('Num. weakly connected components: ', nx.number_weakly_connected_components(twitter_g)) # print('Saving adjacency matrix') # Get adjacency matrix adj = nx.adjacency_matrix(twitter_g) # Save adjacency matrix with open('./twitter/twitter-combined-adj.pkl', 'wb') as f: pickle.dump(adj, f) ################################## ##### VISUALIZATIONS, STATS ###### ##################################
if len(g)>=size_of_component: uu+=1 if uu>=number_of_components: break else: counte+=1 continue print str(" ") print G.name print str(" ") G.remove_nodes_from(nx.isolates(G)) print 'Number of strongly connected components:', nx.number_strongly_connected_components(G) print 'Number of weakly connected components:', nx.number_weakly_connected_components(G) print str(" ") print 'Number of unilaterally connected components:', len(ucc) print str(" ") print 'Unilaterally connected components (UCC):' for i in range(len(ucc)): print 'UCC', str(i+1)+':', ucc[i] print str(" ") print 'Edges in unilaterally connected components:' for i in range(len(ucce)): print 'Edges in UCC', str(i+1)+':', ucce[i] print str(" ")
def run(filename, gt_file, n_iter): f=open(filename) line1=f.readline() print line1 f.close() if len(line1.split()) !=2: g=input1(filename) else: g=input2(filename) print str(len(g.nodes())) + " vertices in graph to begin with." connected_components=[x for x in nx.weakly_connected_components(g)] for component in connected_components: if len(component) < 10: g.remove_nodes_from(component) print str(len(g.nodes())) + " vertices in graph after removing components of at most "+str(LENGTH_THRESHOLD)+ " nodes." read_to_chr_map={} if gt_file.split('.')[-1]=='json': with open(gt_file,'r') as f: tmp_map=json.load(f) for read in tmp_map: readid=int(read.strip("'")) read_to_chr_map[readid] = int(tmp_map[read][0][2]) else: with open(gt_file,'r') as f: for num, line in enumerate(f.readlines()): m = map(int, line.strip().split()) read_to_chr_map[m[0]]=m[1] nodes_seen=set([x.split("_")[0] for x in g.nodes()]) for node in nodes_seen: read_to_chr_map.setdefault(int(node),-1) #print nx.info(g) print "Num reads read : "+str(len(read_to_chr_map)) for node in g.nodes(): nodeid=int(node.split('_')[0]) g.node[node]['count'] = 1 g.node[node]['read'] = node #print str(nodeid), node,g.node[node]['chr'] degree_sequence=sorted(g.degree().values(),reverse=True) print Counter(degree_sequence) for i in range(n_iter): for node in g.nodes(): if g.in_degree(node) == 0: g.remove_node(node) print nx.info(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) g.graph['aval'] = 1000000000 for i in range(5): merge_simple_path(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) h=nx.DiGraph() h.add_nodes_from(g) h.add_edges_from(g.edges()) for node in g.nodes(): reads_in_node=[int(x.split('_')[0]) for x in g.node[node]['read'].split(':')] try: chr_in_node=map(lambda x: read_to_chr_map[x], reads_in_node) except: print reads_in_node,g.node[node]['read'] return chr_in_node_set=set(chr_in_node) if len(chr_in_node_set) ==1: h.node[node]['chr']=chr_in_node[0] else: h.node[node]['chr']=':'.join(map(str,chr_in_node)) h.node[node]['count']=g.node[node]['count'] try: h.node[node]['read']=g.node[node]['read'] except: pass nx.write_graphml(h, filename.split('.')[0]+'_condensed_annotated.graphml') print nx.number_weakly_connected_components(h) print nx.number_strongly_connected_components(h)
with open("graph_edges.csv", "r") as edge_file: for pair in edge_file: edge = pair.split(';') edge[1] = edge[1].strip() try: discussion_graph.node[edge[0]]['sender'] discussion_graph.node[edge[1]]['sender'] discussion_graph.add_edge(*edge) except KeyError: pass edge_file.close() print("Edges added.") print("No. of Nodes: ", nx.number_of_nodes(discussion_graph)) print("No. of Edges: ", nx.number_of_edges(discussion_graph)) print("No. of Weakly Connected Components: ", nx.number_weakly_connected_components(discussion_graph)) # Uncomment the lines below to save the graph as a GEXF file # nx.write_gexf(discussion_graph, "gexf/master_disc_graph.gexf") # print("GEXF file generated.") # Uncomment the lines below to read the graph from a GEXF file # discussion_graph = nx.read_gexf("gexf/master_disc_graph.gexf", node_type=int) # print("Graph loaded from GEXF file.") for conn_subgraph in nx.weakly_connected_component_subgraphs(discussion_graph): sender_color_map = {} node_list = [int(x) for x in conn_subgraph.nodes()] # Comment the respective lines below to only save in the required formats nx.write_gexf(conn_subgraph, 'gexf/' + str(min(node_list))+'.gexf')
listofgraphs.append((G, int(number))) number = line.strip().split()[1] edgelist = [] else: edgelist.append(line.rstrip()) f.close() G = nx.read_edgelist(edgelist, create_using=nx.DiGraph()) listofgraphs.append((G, int(number))) F = None for (G, number) in listofgraphs: for i in range(number): if F == None: F = G.copy() else: F = nx.disjoint_union(F,G) while nx.number_weakly_connected_components(F) > 1: addRandomEdge(F) F = nx.convert_node_labels_to_integers(F,1) f2 = open(sys.argv[1] + '.OUT', 'wb') f2.write(str(len(F.nodes())) + '\n') nx.write_edgelist(F, f2, data=False) f2.close() #os.system("./Kavosh -i output.txt -r 1000 -s 3") #nx.draw(F) #plt.show()
def run(filename, gt_file, n_iter): f=open(filename) line1=f.readline() print line1 f.close() if len(line1.split()) !=2: g=input1(filename) else: g=input2(filename) mapping_dict = {} with open(gt_file,'r') as f: for num, line in enumerate(f.readlines()): m = map(int, line.strip().split()) # mapping_dict[num] = [min(m), max(m), int(m[0]>m[1])] mapping_dict[num] = m[1] print nx.info(g) for node in g.nodes(): nodeid=int(node.split('_')[0]) g.node[node]['count'] = 1 g.node[node]['chr']=mapping_dict[nodeid] g.node[node]['read'] = node #print str(nodeid), node,g.node[node]['chr'] degree_sequence=sorted(g.degree().values(),reverse=True) print Counter(degree_sequence) for i in range(n_iter): for node in g.nodes(): if g.in_degree(node) == 0: g.remove_node(node) print nx.info(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) g.graph['aval'] = 1000000000 for i in range(5): merge_simple_path(g) degree_sequence=sorted(nx.degree(g).values(),reverse=True) print Counter(degree_sequence) h=nx.DiGraph() h.add_nodes_from(g) h.add_edges_from(g.edges()) for node in g.nodes(): h.node[node]['count']=g.node[node]['count'] h.node[node]['chr']=g.node[node]['chr'] try: h.node[node]['read']=g.node[node]['read'] except: pass try: import ujson mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json')) print 'get mapping' for node in h.nodes(): #print node if mapping.has_key(node): h.node[node]['aln_start'] = mapping[node][0] h.node[node]['aln_end'] = mapping[node][1] h.node[node]['aln_strand'] = mapping[node][2] else: h.node[node]['aln_start'] = 0 h.node[node]['aln_end'] = 0 h.node[node]['aln_strand'] = 0 except: pass nx.write_graphml(h, filename.split('.')[0]+'_condensed.graphml') print nx.number_weakly_connected_components(h) print nx.number_strongly_connected_components(h)
def test_number_weakly_connected_components(self): for G, C in self.gc: U = G.to_undirected() w = nx.number_weakly_connected_components(G) c = nx.number_connected_components(U) assert_equal(w, c)
pos=nx.spring_layout(G,k=0.15,iterations=10) # pos=nx.graphviz_layout(G) # pos=layout(G) G.remove_nodes_from(nx.isolates(G)) print str(" ") print 'WEAK CONNECTEDNESS OF DIRECTED GRAPHS' print str(" ") print str(" ") print G.name print str(" ") print 'Is graph G weakly connected?', nx.is_weakly_connected(G) print 'The number of weakly connected components of G is:', nx.number_weakly_connected_components(G) print str(" ") lc=sorted(nx.weakly_connected_components(G), key = len, reverse=True) print 'List of weakly connected components:' # print sorted(nx.weakly_connected_components(G), key = len, reverse=True) print lc print str(" ") deg=G.degree() deg_dic=[] for nd in deg: if deg[nd]>0: deg_dic.append(nd) node0 = random.choice(deg_dic)
def compute_quota(G, gg, date, windowsize, topic, all_uid_pr, network_type): prekey = _utf8_unicode(topic)+'_'+str(date)+'_'+str(windowsize) #print 'prekey:', prekey.encode('utf-8') #print 'G_nodes:',len(G.nodes()) #print 'gg_nodes:', len(gg.nodes()) #无向图的最大连通子图 G_edges = G.edges() print 'G_edges:',len(G_edges) ''' nodes_list = G.nodes() l = len(nodes_list) print 'l:', l r = random.randint(0,l-1) print 'r:', r bfs_edges = list(nx.bfs_edges(gg,nodes_list[r])) print 'bfs_edges:', bfs_edges print 'len(bfs_edges):', len(bfs_edges) ''' degree = G.degree() print 'degree_counter' degree_test = get_counter(degree) indegree = G.in_degree() #print 'indegree:', indegree indegree_histogram = get_counter(indegree) save_quota(prekey+'_indegree_histogram_'+str(network_type), json.dumps(indegree_histogram)) outdegree = G.out_degree() #print 'outdegree:', outdegree outdegree_histogram = get_counter(outdegree) save_quota(prekey+'_outdegree_histogram_'+str(network_type), json.dumps(outdegree_histogram)) HH = nx.connected_component_subgraphs(gg) maxhn = 0 for h in HH: if maxhn < len(h.nodes()): maxhn = len(h.nodes()) H = h #print 'H_nodes:', len(H.nodes()) #ndegree = G.degree() # 节点度,dict{nodes:value} #get_key_user('node_degree', topic, date, windowsize, ndegree) #根据节点度排序,获取节点度层面的关键用户 dCentrality = nx.degree_centrality(G) # 度中心性 dict{nodes:value} 度量重要性 avedc = get_ave(dCentrality) #平均度中心性 float save_quota(prekey+'_ave_degree_centrality_'+str(network_type), avedc) maxwcc = nx.weakly_connected_component_subgraphs(G).next() #print 'maxwcc_G:', len(maxwcc) bCentrality = nx.betweenness_centrality(G) # 介数中心 dict{nodes:value},度量其对网络流程的重要性 avebc = get_ave(bCentrality) # 平均介数中心性 float save_quota(prekey+'_ave_betweenness_centrality_'+str(network_type), avebc) cCentrality = nx.closeness_centrality(G) # 紧密中心性 dict{nodes:value},度量感知整个网络流程事件的位置 avecc = get_ave(cCentrality) # 平均紧密中心性 float save_quota(prekey+'_ave_closeness_centrality_'+str(network_type), avecc) # get_key_user module print 'get_user' get_key_user(topic, date, windowsize, dCentrality, bCentrality, cCentrality, network_type) ''' eCentrality = nx.eigenvector_centrality_numpy(G) # 特征向量中心性 #get_key_user('eigenvector_centrality', topic, date, windowsize, eCentrality) # 获取特征向量中心性层面的关键用户 aveec = get_ave(eCentrality) # 平均特征向量中心性 float save_quota(prekey+'_eigenvector_centrality_'+str(network_type), aveec) ''' spl_histogram = get_spl_histogram(H) save_quota(prekey + '_shortest_path_length_histogram_'+str(network_type), json.dumps(spl_histogram)) avespl = nx.average_shortest_path_length(H) # !!!! # 平均最短路径长度 float--only for connected gragh save_quota(prekey+'_average_shortest_path_length_'+str(network_type), avespl) dhistogram = nx.degree_histogram(G) # 节点度分布(从一到最大度的出现频次) save_quota(prekey+'_degree_histogram_'+str(network_type), dhistogram) ''' #Hdhistogram = nx.degree_histogram(G) # !!!! # histogram of H-----max connected graph #save_quota(prekey + '_H_degree_histogram', Hdhistogram) ''' gamma = get_powerlaw(dhistogram, prekey) # 幂律分布系数 save_quota(prekey+'_power_law_distribution_'+str(network_type), gamma) nnodes = len(G.nodes()) # the number of nodes in G save_quota(prekey+'_number_nodes_'+str(network_type), nnodes) Hnnodes = len(H.nodes()) # the number o nodes in H ratio_H2G = float(Hnnodes) / float(nnodes) print '!!!!!ratio_H2G!!!!!:',ratio_H2G #save_quota(prekey + '_ratio_H2G', ratio_H2G) alldegree = sum(dhistogram) ave_degree = float(alldegree) / float(nnodes) # ave_degree 平均节点度 save_quota(prekey+'_ave_degree_'+str(network_type), ave_degree) nedges = len(G.edges()) # the number of edged in G save_quota(prekey+'_number_edges_'+str(network_type), nedges) gdiameter = nx.diameter(H) # !!! # The diameter is the maximum eccentricity int-n save_quota(prekey+'_diameter_'+str(network_type), gdiameter) geccentricity = nx.eccentricity(H) # !!! # the eccentricity of nodes in gg avegec = get_ave(geccentricity) save_quota(prekey+'_ave_eccentricity_'+str(network_type), avegec) sconnectedn = nx.number_strongly_connected_components(G) # 强连通子图数量 int-n save_quota(prekey+'_number_strongly_connected_components_'+str(network_type), sconnectedn) #maxscc = nx.strongly_connected_component_subgraphs(G).next() #print 'maxwcc:', len(maxwcc) wconnectesn = nx.number_weakly_connected_components(G) # 弱连通子图数量 int-n save_quota(prekey+'_number_weakly_connected_components_'+str(network_type), wconnectesn) maxwcc = nx.weakly_connected_component_subgraphs(G).next() print 'maxwcc_G:', len(maxwcc.nodes()) print '!!!!ratio_maxwcc_G!!!:', float(len(maxwcc.nodes()))/float(nnodes) aveclustering = nx.average_clustering(gg) # !!!! # 平均聚类系数 save_quota(prekey+'_average_clustering_'+str(network_type), aveclustering) dassortativity_coefficient = nx.degree_assortativity_coefficient(G) # 同配性系数 save_quota(prekey + '_degree_assortativity_coefficient_'+str(network_type), dassortativity_coefficient) #print 'G_edges:', len(G.edges()) #print 'G_edges:', len(G.selfloop_edges()) #GG = G #GG.remove_edges_from(GG.selfloop_edges()) #print 'test_edges:',len(GG.edges()) kcore = nx.core_number(G) #print 'kcore:', kcore # k_score k核数 #avekc = get_ave(kcore) maxkc = get_max(kcore) save_quota(prekey + '_max_k_core_'+str(network_type), maxkc)