Exemple #1
0
def read_graph(filename,gt_file):

    with open(gt_file) as f:
        read_dict=json.load(f)
    g = nx.DiGraph()

    with open (filename) as f:
        for lines in f:
            lines1=lines.split()
            g.add_node(lines1[0] + "_" + lines1[2])
            g.add_node(lines1[1] + "_" + lines1[3])
            if lines1[0] in read_dict:
                g.node[lines1[0] + "_" + lines1[2]]['aln_start']=min(read_dict[lines1[0]][0][0],read_dict[lines1[0]][0][1])
                g.node[lines1[0] + "_" + lines1[2]]['aln_end']=max(read_dict[lines1[0]][0][0],read_dict[lines1[0]][0][1])
            else:
                g.node[lines1[0] + "_" + lines1[2]]['aln_start']=0
                g.node[lines1[0] + "_" + lines1[2]]['aln_end']=0
            if lines1[1] in read_dict:
                g.node[lines1[1] + "_" + lines1[3]]['aln_start']=min(read_dict[lines1[1]][0][0],read_dict[lines1[1]][0][1])
                g.node[lines1[1] + "_" + lines1[3]]['aln_end']=max(read_dict[lines1[1]][0][0],read_dict[lines1[1]][0][1])
            else:
                g.node[lines1[1] + "_" + lines1[3]]['aln_start']=0
                g.node[lines1[1] + "_" + lines1[3]]['aln_end']=0

            g.node[lines1[0] + "_" + lines1[2]]['active']=1
            g.node[lines1[1] + "_" + lines1[3]]['active']=int(lines1[4])
            g.add_edge(lines1[0] + "_" + lines1[2], lines1[1] + "_" + lines1[3])
    
    
    nx.write_graphml(g, filename.split('.')[0]+'_hgraph.graphml')
    
    print nx.number_weakly_connected_components(g)
    print nx.number_strongly_connected_components(g)
Exemple #2
0
    def main(self):

        # Load the data
        data = dataLoader.DataLoader()
        medium = data.load_medium()
        large = data.load_large()

        # Send it to the opener
        med = self.opener(medium)
        lg = self.opener(large)

        # Print the results
        print("Q2.4 How many weakly connected components and how many strongly connected components does this network have? How many nodes and links are in the largest strongly connected component of this graph?\n")

        print("Number of weakly connected components Medium: " + str(nx.number_weakly_connected_components(med)))
        print("Number of weakly connected components Large: " + str(nx.number_weakly_connected_components(lg)))
        print("\n")
        print("Number of strongly connected components Medium: " +  str(nx.number_strongly_connected_components(med)))
        print("Number of strongly connected components Large: " + str(nx.number_strongly_connected_components(lg)))
        print("\n")
        print("How many nodes are in the largest strongly connected component?")
        print("Medium Network: " + str(nx.number_of_nodes(max(nx.strongly_connected_component_subgraphs(med), key=len))))
        print("Large Network: " + str(nx.number_of_nodes(max(nx.strongly_connected_component_subgraphs(lg), key=len))))
        print("\n")
        print("How many edges are in the largest strongly connected component?")
        print("Medium Network: " + str(nx.number_of_edges(max(nx.strongly_connected_component_subgraphs(med), key=len))))
        print("Large Network: " + str(nx.number_of_edges(max(nx.strongly_connected_component_subgraphs(lg), key=len))))
Exemple #3
0
def save_network_statistics(g):
    stats = {}
    stats[
        'num_weakly_connected_components'] = nx.number_weakly_connected_components(
            g)
    stats[
        'num_strongly_connected_components'] = nx.number_strongly_connected_components(
            g)
    stats['num_nodes'] = nx.number_of_nodes(g)
    stats['num_edges'] = nx.number_of_edges(g)
    stats['density'] = nx.density(g)
    try:
        stats['avg_clustering_coef'] = nx.average_clustering(g)
    except:
        stats['avg_clustering_coef'] = None  # not defined for directed graphs
    stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes'])
    stats['transitivity'] = nx.transitivity(g)
    try:
        stats['diameter'] = nx.diameter(g)
    except:
        stats[
            'diameter'] = None  # unconnected --> infinite path length between connected components

    with open('./network-statistics/twitter-combined-statistics.txt',
              'wb') as f:
        for stat_name, stat_value in stats.iteritems():
            f.write(stat_name + ': ' + str(stat_value) + '\n')
Exemple #4
0
def main():
	# Create authenticated API
    consumer_key=""
    consumer_secret=""
    access_token=""
    token_secret=""
    
    api=twitter.Api(consumer_key=consumer_key,consumer_secret=consumer_secret,access_token_key=access_token,access_token_secret=token_secret)
    hashtag="#cusec"

    # Get all of the CUSEC data
    cusec=list()
    for i in range(1,8):
        cusec.extend(api.GetSearch(hashtag,per_page=100,page=i))
    print("Number of searches returned: "+str(len(cusec)))
    
    # List of all CUSEC tweeters
    cusec_tweeters=unique(map(lambda s: s.user.screen_name, cusec))
    print("Number of people Tweeting "+hashtag+": "+str(len(cusec_tweeters)))
    
    # Create network
    cusec_network,cusec_mc,cusec_subgraph=twitter_network(cusec_tweeters, api, user_type="cusec")
    
    # Export networks
    nx.write_graphml(cusec_network, "data/cusec_network.graphml")
    nx.write_graphml(cusec_mc,"data/cusec_main_comp.graphml")
    nx.write_graphml(cusec_subgraph,"data/cusec_users_subgraph.graphml")
    
    print(nx.info(cusec_network))
    print("Number of weakly connected components: "+str(nx.number_weakly_connected_components(cusec_network)))
Exemple #5
0
def save_network_statistics(g):
    degrees = g.degree

    nn = [i[1] for i in degrees]

    stats = {}
    stats[
        'num_weakly_connected_components'] = nx.number_weakly_connected_components(
            g)
    stats[
        'num_strongly_connected_components'] = nx.number_strongly_connected_components(
            g)
    stats['num_nodes'] = nx.number_of_nodes(g)
    stats['num_edges'] = nx.number_of_edges(g)
    stats['density'] = nx.density(g)
    try:
        stats['avg_clustering_coef'] = nx.average_clustering(g)
    except:
        stats['avg_clustering_coef'] = None  # not defined for directed graphs
    stats['avg_degree'] = sum(nn) / float(stats['num_nodes'])
    stats['transitivity'] = nx.transitivity(g)
    try:
        stats['diameter'] = nx.diameter(g)
    except:
        stats[
            'diameter'] = None  # unconnected --> infinite path length between connected components

    with open('D:\\fingerprint-statistics-undirected.txt', 'w') as f:
        for stat_name, stat_value in stats.items():
            temp = str(stat_value)
            f.write(stat_name + ': ' + temp + '\n')
def analyze_connected_components(graph, data, filename, clusters=None):
    print('Nodes:', graph.number_of_nodes())
    print("Components:", nx.number_weakly_connected_components(graph), '\n')

    print(max([len(nodes) for nodes in nx.weakly_connected_components(graph)]))
    for i, nodes in enumerate(nx.weakly_connected_components(graph)):
        if clusters is not None and i not in clusters:
            continue
        # skip is cluster is small or is complete (for now)
        if len(nodes) < 3 or not nx.complement(nx.Graph(
                graph.subgraph(nodes))).number_of_edges():
            continue
        print('meta-cluster id:', i, 'quality:',
              cluster_quality(graph.subgraph(nodes), data))
        print(r'\begin{center}')
        print(r'\includegraphics[width=4in]{./plots/analyze_streaming_alg/' +
              filename + '/' + str(i) + '.png}')
        print(r'\end{center}')
        for node in nodes:
            comp = graph.nodes[node]['contains']
            ad_texts = data.data.loc[data.data['ad_id'].isin(
                comp)]['u_Description']
            text = remove_punctuation(ad_texts.iloc[0])
            print(node, text, '\n')
        print(r'\newpage ')
Exemple #7
0
def get_graph_statistics(DG,category,kcore):

	print('Graph statistics for category:',category)

	# number of connected components
	num_connected_components = nx.number_weakly_connected_components(DG)
	print('Number of weakly connected components:',num_connected_components)
	print()
	category_name_list = category.split(' ')

	# in-degree distribution
	print('In-degree distribution.')
	filename = "-".join(category_name_list)+'-in-degree-hist-'+str(kcore)+'.eps'
	indegree_list = DG.in_degree().values()
	if indegree_list:
		print('Max value of in-degree',max(indegree_list))
		print()
		# print('Indegree list:',indegree_list)
		histogram(indegree_list,'plots/category_in_degree/'+filename,'in-degree','# nodes','Distribution of in-degree per category',100,None,1,500,True)

	# edge weight distribution
	print('Edge weight distribution.')
	filename = "-".join(category_name_list)+'-edge-weight-hist-'+str(kcore)+'.eps'
	weight_list = [e[2] for e in DG.edges_iter(data='weight', default=1)]
	if weight_list:
		print('Max value of weight edges',max(weight_list))
		print()
		# print('Weight list:',weight_list)
		histogram(weight_list,'plots/category_edge_weight/'+filename,'edge weight','# edges','Distribution of edge weights per category',60,None,1,500,True)
def answer_four():
    # Your Code Here
    G = answer_one()
    no_weak_components = nx.number_weakly_connected_components(G)
    no_weak_nodes = len(list(nx.weakly_connected_components(G))[0])

    return no_weak_nodes
Exemple #9
0
    def component(self):
        rslt = {}
        if self.directed == 'directed':
            rslt['is_strongly_connected'] = nx.is_strongly_connected(
                self.graph)

            strong = nx.strongly_connected_components(self.graph)
            strong_nodes = []
            for n in strong:
                strong_nodes.append(list(n)[0])
            rslt['strongly_connected'] = strong_nodes

            rslt[
                'number_strongly_connected_components'] = nx.number_strongly_connected_components(
                    self.graph)
            rslt['is_semiconnected'] = nx.is_semiconnected(self.graph)

            weak = nx.weakly_connected_components(self.graph)
            weak_nodes = []
            for n in weak:
                weak_nodes.append(list(n)[0])
            rslt['wealy_connected'] = weak_nodes

            rslt['is_weakly_connected'] = nx.is_weakly_connected(self.graph)
            rslt[
                'number_weakly_connected_components'] = nx.number_weakly_connected_components(
                    self.graph)

        fname_component = self.DIR + '/component.json'
        with open(fname_component, "w") as f:
            json.dump(rslt, f, cls=SetEncoder, indent=2)
        print(fname_component)
Exemple #10
0
def component_stats(G, verbose):
	"""Prints out various relevent stats about graphs concerning components.

	Parameters
	----------
	G : networkx.DiGraph
	verbose : bool
	    Set to True if you want explanations of stats

	Returns
	-------

	Note: Writes to terminal.
	"""

	explans = {}
	if verbose == True:
		explans['weakly-connected'] = "(There is an undirected path between each pair of nodes in the directed graph)"
		explans['strongly-connected'] = "(There is a directed path between each pair of nodes in the directed graph)"
		explans['semiconnected'] = ""
	else:
		explans['weakly-connected'] = ""
		explans['strongly-connected'] = ""
		explans['semiconnected'] = ""
		
	
	print "Is the graph weakly connected "+explans['weakly-connected'] +"? "+ str(nx.is_weakly_connected(G)) 
	print "Number of weakly connected components: " + str(nx.number_weakly_connected_components(G))
	print "Is the graph semiconnected "+explans['semiconnected']+ "? " + str(nx.is_semiconnected(G))
	print "Is the graph strongly connected "+explans['strongly-connected']+ "? "+ str(nx.is_strongly_connected(G))
def graph_structure_info(G, filename, df):
    #degree_out  = pd.DataFrame([d for n,d in G.out_degree()]) # 这里定义了out_degree
    degree_in = pd.DataFrame([d for n, d in G.in_degree()])  # 这里定义了out_degree
    df1 = pd.DataFrame(
        data={
            'date': [filename],
            'nodes_num': [G.number_of_nodes()],
            'edges_num': [G.number_of_edges()],
            'density': [nx.density(G)],
            'number_weakly_connected_components':
            [nx.number_weakly_connected_components(G)],
            'number_strongly_connected_components':
            [nx.number_strongly_connected_components(G)],
            #'size_largest_strongly_connected_components':[len(max(nx.strongly_connected_components(G), key=len))],
            #'size_largest_weakly_connected_components':[len(max(nx.weakly_connected_components(G), key=len))],
            'weights_sum':
            [sum(list((nx.get_edge_attributes(G, 'weight')).values()))],
            #'ave_degree_out':[degree_out.mean()], #入度均值=出度均值
            'ave_degree': [degree_in.mean()]
        })
    #'ave_clustering_coeffient':[nx.average_clustering(G)],
    #'ave_shortest_path_length':[nx.average_shortest_path_length(G)]
    #print df1
    df = df.append(df1)
    print "this year graph_basic_info is done"
    return df
Exemple #12
0
def plot_num_components_directed(G_times, fname):
	max_time = len(G_times)
	t = list(range(0, max_time))
	num_strong = []
	num_weak = []

	for G in G_times:
		num_strong.append(nx.number_strongly_connected_components(G))
		num_weak.append(nx.number_weakly_connected_components(G))

	plt.rcParams.update({'figure.autolayout': True})
	plt.rc('xtick', labelsize='x-small')
	plt.rc('ytick', labelsize='x-small')
	fig = plt.figure(figsize=(4, 2))
	ax = fig.add_subplot(1, 1, 1)
	ax.plot(t, num_strong, marker="P", color='#ffa600', ls='solid', linewidth=0.5, markersize=1, label="strongly")
	ax.plot(t, num_weak, marker="h", color='#003f5c', ls='solid', linewidth=0.5, markersize=1, label="weakly")
	ax.set_xlabel('time', fontsize=8)


	outliers = find_rarity_windowed_outlier(num_weak)		#use weakly 
	outliers.sort()
	for xc in outliers:
		plt.axvline(x=xc,color='k', linestyle=":", linewidth=0.5)
	# ax.set_xscale('log')
	# ax.set_yscale('log')
	ax.set_ylabel('number of connected components', fontsize=8)
	plt.title("number of connected components over time", fontsize='x-small')
	plt.legend(fontsize=5)
	plt.savefig(fname+'components.pdf', pad_inches=0)

	return outliers
Exemple #13
0
def info_list(graph):
    """Returns useful information about the graph as a list of tuples

    :param pybel.BELGraph graph: A BEL graph
    :rtype: list
    """
    number_nodes = graph.number_of_nodes()
    result = [
        ('Nodes', number_nodes),
        ('Edges', graph.number_of_edges()),
        ('Citations', count_unique_citations(graph)),
        ('Authors', count_unique_authors(graph)),
        ('Network density', nx.density(graph)),
        ('Components', nx.number_weakly_connected_components(graph)),
    ]

    try:
        result.append(('Average degree', sum(graph.in_degree().values()) / float(number_nodes)))
    except ZeroDivisionError:
        log.info('Graph has no nodes')

    if graph.warnings:
        result.append(('Compilation warnings', len(graph.warnings)))

    return result
Exemple #14
0
def iterative_graph(monostrings,
                    min_k,
                    max_k,
                    outdir,
                    min_mult=5,
                    step=1,
                    starting_graph=None,
                    verbose=True):
    smart_makedirs(outdir)
    dbs, all_contigs = {}, {}
    all_frequent_kmers, all_frequent_kmers_read_pos = {}, {}
    strings = {k: ''.join(v.string) for k, v in monostrings.items()}
    input_strings = strings.copy()
    complex_kp1mers = {}

    if starting_graph is not None:
        contigs, contig_paths = starting_graph.get_contigs()
        for i in range(len(contigs)):
            for j in range(min_mult):
                input_strings[f'contig_k{min_k}_i{i}_j{j}'] = contigs[i]

        complex_kp1mers = get_paths_thru_complex_nodes(starting_graph, strings)

    for k in range(min_k, max_k + 1, step):
        frequent_kmers, frequent_kmers_read_pos = \
            get_frequent_kmers(input_strings, k=k, min_mult=min_mult)
        frequent_kmers.update(complex_kp1mers)
        if verbose:
            print(f'\nk={k}')
            print(f'#frequent kmers = {len(frequent_kmers)}')
        all_frequent_kmers[k] = frequent_kmers
        all_frequent_kmers_read_pos[k] = frequent_kmers_read_pos

        db = DeBruijnGraph(k=k)
        db.add_kmers(frequent_kmers, coverage=frequent_kmers)

        db.collapse_nonbranching_paths()
        if verbose and nx.number_weakly_connected_components(db.graph) > 1:
            print(f'#cc = {nx.number_weakly_connected_components(db.graph)}')
            for cc in nx.weakly_connected_components(db.graph):
                print(len(cc))
            # break
        dbs[k] = db

        dot_file = os.path.join(outdir, f'db_k{k}.dot')
        # pdf_file = os.path.join(outdir, f'db_k{k}.pdf')
        nx.drawing.nx_pydot.write_dot(db.graph, dot_file)
        # os.system(f"dot -Tpdf {dot_file} -o {pdf_file}")

        contigs, contig_paths = db.get_contigs()
        all_contigs[k] = contigs

        input_strings = strings.copy()
        for i in range(len(contigs)):
            for j in range(min_mult):
                input_strings[f'contig_k{k}_i{i}_j{j}'] = contigs[i]

        complex_kp1mers = get_paths_thru_complex_nodes(db, strings)

    return all_contigs, dbs, all_frequent_kmers, all_frequent_kmers_read_pos
Exemple #15
0
def DNetworkSummary(D, qtr, filename=None):
    """Just outputs and prints a quick table of DIRECTED network statistics."""
    ##  Calculate all the network summary stats
    n = D.number_of_nodes()
    e = D.size()
    e_c = D.size(weight='calls')
    e_min = D.size(weight='min')
    e_sms = D.size(weight='sms')
    e_mms = D.size(weight='mms')
    n_scc = nx.number_strongly_connected_components(D)
    r_scc = relativeLSCCsize(D)
    n_wcc = nx.number_weakly_connected_components(D)
    r_wcc = relativeLWCCsize(D)

    ##  Description vector for printout and output file
    ts = "    "  # just so the output file is a little more readable
    a1 = "Directed Network Statistics -- Quarter "
    a2 = ts + "Number of nodes: "
    a3 = ts + "Number of edges (unweighted): "
    a4 = ts + ts + "Number of edges (weighted by calls): "
    a5 = ts + ts + "Number of edges (weighted by minutes): "
    a6 = ts + ts + "Number of edges (weighted by SMS): "
    a7 = ts + ts + "Number of edges (weighted by MMS): "
    a8 = ts + "Number of Strongly Connected Components (SCC): "
    a9 = ts + ts + "Relative size of largest SCC: "
    a10 = ts + "Number of Weakly Connected Components (WCC): "
    a11 = ts + ts + "Relative size of largest WCC: "
    name = [a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11]
    result = [qtr, n, e, e_c, e_min, e_sms, e_mms, n_scc, r_scc, n_wcc, r_wcc]
    outputresults = pd.DataFrame(data={'Description': name, 'Result': result})
    if (filename is not None):
        outputresults.to_csv(filename, index=False)
    print outputresults
def draw_graph(nodes, edges, graphs_dir, default_lang='all'):
    lang_graph = nx.MultiDiGraph()
    lang_graph.add_nodes_from(nodes)
    for edge in edges:
        if edges[edge] == 0:
            lang_graph.add_edge(edge[0], edge[1])
        else:
            lang_graph.add_edge(edge[0], edge[1], weight=float(edges[edge]), label=str(edges[edge]))

    # print graph info in stdout
    # degree centrality
    print('-----------------\n\n')
    print(default_lang)
    print(nx.info(lang_graph))
    try:
        # When ties are associated to some positive aspects such as friendship or collaboration,
        #  indegree is often interpreted as a form of popularity, and outdegree as gregariousness.
        DC = nx.degree_centrality(lang_graph)
        max_dc = max(DC.values())
        max_dc_list = [item for item in DC.items() if item[1] == max_dc]
    except ZeroDivisionError:
        max_dc_list = []
    # https://ru.wikipedia.org/wiki/%D0%9A%D0%BE%D0%BC%D0%BF%D0%BB%D0%B5%D0%BA%D1%81%D0%BD%D1%8B%D0%B5_%D1%81%D0%B5%D1%82%D0%B8
    print('maxdc', str(max_dc_list), sep=': ')
    # assortativity coef
    AC = nx.degree_assortativity_coefficient(lang_graph)
    print('AC', str(AC), sep=': ')
    # connectivity
    print("Слабо-связный граф: ", nx.is_weakly_connected(lang_graph))
    print("количество слабосвязанных компонент: ", nx.number_weakly_connected_components(lang_graph))
    print("Сильно-связный граф: ", nx.is_strongly_connected(lang_graph))
    print("количество сильносвязанных компонент: ", nx.number_strongly_connected_components(lang_graph))
    print("рекурсивные? компоненты: ", nx.number_attracting_components(lang_graph))
    print("число вершинной связности: ", nx.node_connectivity(lang_graph))
    print("число рёберной связности: ", nx.edge_connectivity(lang_graph))
    # other info
    print("average degree connectivity: ", nx.average_degree_connectivity(lang_graph))
    print("average neighbor degree: ", sorted(nx.average_neighbor_degree(lang_graph).items(),
                                              key=itemgetter(1), reverse=True))
    # best for small graphs, and our graphs are pretty small
    print("pagerank: ", sorted(nx.pagerank_numpy(lang_graph).items(), key=itemgetter(1), reverse=True))

    plt.figure(figsize=(16.0, 9.0), dpi=80)
    plt.axis('off')
    pos = graphviz_layout(lang_graph)
    nx.draw_networkx_edges(lang_graph, pos, alpha=0.5, arrows=True)
    nx.draw_networkx(lang_graph, pos, node_size=1000, font_size=12, with_labels=True, node_color='green')
    nx.draw_networkx_edge_labels(lang_graph, pos, edges)

    # saving file to draw it with dot-graphviz
    # changing overall graph view, default is top-bottom
    lang_graph.graph['graph'] = {'rankdir': 'LR'}
    # marking with blue nodes with maximum degree centrality
    for max_dc_node in max_dc_list:
        lang_graph.node[max_dc_node[0]]['fontcolor'] = 'blue'
    write_dot(lang_graph, os.path.join(graphs_dir, default_lang + '_links.dot'))

    # plt.show()
    plt.savefig(os.path.join(graphs_dir, 'python_' + default_lang + '_graph.png'), dpi=100)
    plt.close()
def _sanity_check(G):
    r"""
    Helper function that checks if the input graphs contains a single connected component. Raises an error if not.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    Raises
    ------
    ValueError
        If the graph has more than one (weakly) connected component.
    """
    # Compute the number of connected components
    if G.is_directed():
        num_ccs = nx.number_weakly_connected_components(G)
    else:
        num_ccs = nx.number_connected_components(G)

    # Rise an error if more than one CC exists
    if num_ccs != 1:
        raise ValueError(
            "Input graph should contain one (weakly) connected component. "
            "This graph contains: " + str(num_ccs))
def fill_out_report(*,
                    network: Network,
                    report: Report,
                    graph: Optional[BELGraph] = None) -> None:
    """Fill out the report for the network."""
    if graph is None:
        graph = network.as_bel()

    number_nodes = graph.number_of_nodes()

    try:
        average_degree = graph.number_of_edges() / graph.number_of_nodes()
    except ZeroDivisionError:
        average_degree = 0.0

    report.network = network
    report.number_nodes = number_nodes
    report.number_edges = graph.number_of_edges()
    report.number_warnings = graph.number_of_warnings()
    report.number_citations = graph.number_of_citations()
    report.number_authors = graph.number_of_authors()
    report.number_components = nx.number_weakly_connected_components(graph)
    report.network_density = nx.density(graph)
    report.average_degree = average_degree
    report.dump_calculations(graph)
    report.completed = True
Exemple #19
0
def connected_components(network):
    n_scc = nx.number_strongly_connected_components(network)
    n_wcc = nx.number_weakly_connected_components(network)

    print("# of strongly connected components: " + str(n_scc))
    print("# of weakly connected components: " + str(n_wcc))
    n_scc = nx.strongly_connected_component_subgraphs(network)
    print(max(n_scc, key=len).number_of_edges())
Exemple #20
0
def print_info(G):
    logging.debug("begin transitiv reduction")
    logging.debug("nb edge : " + str(G.number_of_edges()))
    logging.debug("nb node : " + str(G.number_of_nodes()))
    logging.debug("nb strong components : " +
                  str(nx.number_strongly_connected_components(G)))
    logging.debug("nb weak components : " +
                  str(nx.number_weakly_connected_components(G)))
def components(graph):
    print("Number of strongly connected components",
          nx.number_strongly_connected_components(graph))
    print("Number of weakly connected components",
          nx.number_weakly_connected_components(graph))

    condensation = nx.condensation(graph)
    nx.write_edgelist(condensation, "Datasets/condenced_graph.edgelist")
Exemple #22
0
def dbg_info(dag, optional_callable=None):
    print('-------------------------------------------------------------------')
    if optional_callable: optional_callable()
    print('Nodes: %d, edges: %d'%(dag.number_of_nodes(),dag.number_of_edges()) )
    print('Is DAG?', nx.is_directed_acyclic_graph(dag))
    nwcc = nx.number_weakly_connected_components(dag)
    print('Weakly connected components:', nwcc)
    dbg_pprint_source_sink_types(dag)
    print('-------------------------------------------------------------------')
Exemple #23
0
def pattern_remove_incomplete_region_or_spatial_path(
    perception_graph: PerceptionGraphPattern
) -> PerceptionGraphPattern:
    """
    Helper function to return a `PerceptionGraphPattern` verifying
    that region and spatial path perceptions contain a reference object.
    """
    graph = perception_graph.copy_as_digraph()
    region_and_path_nodes: ImmutableSet[NodePredicate] = immutableset(
        node
        for node in graph.nodes
        if isinstance(node, IsPathPredicate) or isinstance(node, RegionPredicate)
    )
    nodes_without_reference: List[NodePredicate] = []
    for node in region_and_path_nodes:
        has_reference_edge: bool = False
        for successor in graph.successors(node):
            predicate = graph.edges[node, successor]["predicate"]
            if isinstance(predicate, RelationTypeIsPredicate):
                if predicate.relation_type in [
                    REFERENCE_OBJECT_LABEL,
                    REFERENCE_OBJECT_DESTINATION_LABEL,
                    REFERENCE_OBJECT_SOURCE_LABEL,
                ]:
                    has_reference_edge = True
                    break
        if not has_reference_edge:
            nodes_without_reference.append(node)

    logging.info(
        f"Removing incomplete regions and paths. "
        f"Removing nodes: {nodes_without_reference}"
    )
    graph.remove_nodes_from(nodes_without_reference)

    def sort_by_num_nodes(g: DiGraph) -> int:
        return len(g.nodes)

    # We should maybe consider doing this a different way
    # As this approach just brute force solves the problem rather than being methodical about it
    if number_weakly_connected_components(graph) > 1:
        components = [
            component
            for component in [
                subgraph(graph, comp) for comp in weakly_connected_components(graph)
            ]
        ]
        components.sort(key=sort_by_num_nodes, reverse=True)
        computed_graph = subgraph(graph, components[0].nodes)
        removed_nodes: List[NodePredicate] = []
        for i in range(1, len(components)):
            removed_nodes.extend(components[i].nodes)
        logging.info(f"Cleanup disconnected elements. Removing: {removed_nodes}")
    else:
        computed_graph = graph

    return PerceptionGraphPattern(computed_graph, dynamic=perception_graph.dynamic)
def write_components_info(G, report_file):
    report_file.write("===COMPONENTS_INFO===\n")
    report_file.write("Number of strongly connected components: {}\n".format(
        nx.number_strongly_connected_components(G)))
    report_file.write("Number of weakly connected components: {}\n".format(
        nx.number_weakly_connected_components(G)))
    report_file.write("Number of attractive components: {}\n".format(
        nx.number_attracting_components(G)))
    report_file.write("Is semiconnected: {}\n".format(nx.is_semiconnected(G)))
Exemple #25
0
def get_graph_stats(df):
    edges = list(zip(df["src"],df["trg"]))
    G = nx.DiGraph()
    G.add_edges_from(edges)
    N = G.number_of_nodes()
    M = G.number_of_edges()
    wc_comp = nx.number_weakly_connected_components(G)
    sc_comp = nx.number_strongly_connected_components(G)
    return (N,M,wc_comp,sc_comp)
Exemple #26
0
	def build_graph(self):
		self.coach_map = {}
		self.year_map = {}
		self.reverse_map = {}
		self.G = nx.DiGraph()


		yf = open("basketball_start_year.csv", "r")
		yf.readline()
		for line in yf:
			_, k, v = line.replace("\n", "").replace("\"", "").split(" ")
			self.year_map[k] = int(v)
		yf.close()

		f = open("basketball_playoff_games.txt", 'r')
		for line in f:
			winner, winning_score, loser, losing_score = parse_tuple(line)
			if winner not in self.coach_map:
				winner_index = len(self.coach_map)
				self.coach_map[winner] = winner_index
				self.reverse_map[winner_index] = winner
			else:
				winner_index = self.coach_map[winner]

			if loser not in self.coach_map:
				loser_index = len(self.coach_map)
				self.coach_map[loser] = loser_index
				self.reverse_map[loser_index] = loser
			else:
				loser_index = self.coach_map[loser]

			new_diff = winning_score - losing_score
			if winner in self.year_map and loser in self.year_map:
				if start_year < self.year_map[winner] and self.year_map[winner] < end_year:
				 	if start_year < self.year_map[loser] and self.year_map[loser] < end_year:
						if new_diff > 0:
							add_weight = math.log(1 + 0.1 * new_diff)
							if self.G.get_edge_data(winner_index, loser_index):
								rev_weight = self.G[winner_index][loser_index]['weight']
								if rev_weight > add_weight:
									self.G[winner_index][loser_index]['weight'] -= add_weight
								elif rev_weight == add_weight:
									self.G.remove_edge(winner_index, loser_index)
								else:
									self.G.remove_edge(winner_index, loser_index)
									self.G.add_edge(loser_index, winner_index, weight=(add_weight - rev_weight))
							elif self.G.get_edge_data(loser_index, winner_index):
								self.G[loser_index][winner_index]['weight'] += add_weight
							else:
								self.G.add_edge(loser_index, winner_index, weight=add_weight)



		print len(self.G.nodes())
		print len(self.G.edges())
		print(nx.number_weakly_connected_components(self.G))
    def load(self) -> None:
        """
        Loads adjacency matrix network from provided .npy file.

        filename is set in class instance.
        """
        self.graph   = np.load(self.networkfile)
        self.nxgraph = nx.DiGraph(self.graph)
        if nx.number_weakly_connected_components(self.nxgraph) > 1:
            self.subgraphs = True
Exemple #28
0
def read_graph(filename):

    g = nx.DiGraph()

    with open (filename) as f:
        for lines in f:
            lines1=lines.split()
            g.add_node(lines1[0] + "_" + lines1[2])
            g.add_node(lines1[1] + "_" + lines1[3])

            g.node[lines1[0] + "_" + lines1[2]]['active']=1
            g.node[lines1[1] + "_" + lines1[3]]['active']=int(lines1[4])
            g.add_edge(lines1[0] + "_" + lines1[2], lines1[1] + "_" + lines1[3])
    
    
    nx.write_graphml(g, filename.split('.')[0]+'_hgraph.graphml')
    
    print nx.number_weakly_connected_components(g)
    print nx.number_strongly_connected_components(g)
 def validate(self):
     errors = []
     # count number of subgraphs
     # if nx.number_connected_components(graph.dg.to_undirected()) > 1:
     if nx.number_weakly_connected_components(self.dg) > 1:
         errors.append('no subgraphs are allowed')
     for node in self.nodes.values():
         errors.extend(node.validate())
         # node.validate()
     return errors
def output_conectivity_info (graph, path):
    """Output connectivity information about the graph.
       graph : (networkx.Graph)
       path: (String) contains the path to the output file
    """
    with open(path, 'w') as out:
        out.write('***Conectivity***\n')
        out.write('Is weakly connected: %s\n' % nx.is_weakly_connected(graph))
        out.write('Number of weakly connected components: %d\n' % nx.number_weakly_connected_components(graph))
        out.write('Is strongly connected: %s\n' % nx.is_strongly_connected(graph))
        out.write('Number of strongly connected components: %d' % nx.number_strongly_connected_components(graph))
def _connected_components(weighted_projection):
    if isinstance(weighted_projection, nx.DiGraph):
        return nx.number_weakly_connected_components(weighted_projection)
    else:
        from graph_tool.topology import label_components

        G = weighted_projection
        G.set_directed(False)
        _, comps = label_components(G)
        G.set_directed(True)
        return len(comps)
Exemple #32
0
def dbg_info(dag, optional_callable=None):
    print(
        '-------------------------------------------------------------------')
    if optional_callable: optional_callable()
    print('Nodes: %d, edges: %d' %
          (dag.number_of_nodes(), dag.number_of_edges()))
    print('Is DAG?', nx.is_directed_acyclic_graph(dag))
    nwcc = nx.number_weakly_connected_components(dag)
    print('Weakly connected components:', nwcc)
    dbg_pprint_source_sink_types(dag)
    print(
        '-------------------------------------------------------------------')
Exemple #33
0
def print_stats(graph):
    """Print statistics about the mutation graph."""
    sources = [n for n, in_degree in graph.in_degree() if in_degree == 0]
    sinks = [n for n, out_degree in graph.out_degree() if out_degree == 0]
    min_len, max_len = get_path_stats(graph, sources, sinks)
    num_connected_components = nx.number_weakly_connected_components(graph)

    print('num. source nodes: %d' % len(sources))
    print('num. sink nodes: %d' % len(sinks))
    print('num. connected components: %d' % num_connected_components)
    print('shortest mutation chain: %d' % min_len)
    print('longest mutation chain: %d' % max_len)
Exemple #34
0
 def _describe_list(self) -> List[Tuple[str, float]]:
     """Return useful information about the graph as a list of tuples."""
     number_nodes = self.number_of_nodes()
     return [
         ('Number of Nodes', number_nodes),
         ('Number of Edges', self.number_of_edges()),
         ('Number of Citations', self.number_of_citations()),
         ('Number of Authors', self.number_of_authors()),
         ('Network Density', '{:.2E}'.format(nx.density(self))),
         ('Number of Components', nx.number_weakly_connected_components(self)),
         ('Number of Warnings', self.number_of_warnings()),
     ]
Exemple #35
0
def generate_oriented(graph, X):
    target_feats = np.empty((X.shape[0], 5))
    source_feats = np.empty((X.shape[0], 5))
    edge_feats = np.empty((X.shape[0], 11))
    l = X.shape[0]
    t1 = time()
    for i, x in enumerate(X):
        t = x[0]
        s = x[1]
        in_d_t, out_d_t, scc_t, wcc_t, sccp_t, n_in_t, n_out_t, n_t, np_t = all_oriented_vertex(
            graph, t)
        in_d_s, out_d_s, scc_s, wcc_s, sccp_s, n_in_s, n_out_s, n_s, np_s = all_oriented_vertex(
            graph, s)
        com_in = len(set(n_in_t).intersection(n_in_s))
        com_on = len(set(n_out_t).intersection(n_out_s))
        trans_ts = len(set(n_out_t).intersection(n_in_s))
        trans_st = len(set(n_out_s).intersection(n_in_t))
        friends_measure_st = 0
        friends_measure_ts = 0
        for ns in n_s:
            for nt in n_t:
                if graph.has_edge(ns, nt):
                    friends_measure_st += 1
                if graph.has_edge(nt, ns):
                    friends_measure_ts += 1
        nh = list(set(n_t).union(n_s))
        nh_plus = list(set(np_t).union(np_s))
        sub_nh = graph.subgraph(nh)
        sub_nh_plus = graph.subgraph(nh_plus)
        scc = nx.number_strongly_connected_components(sub_nh)
        wcc = nx.number_weakly_connected_components(sub_nh)
        scc_plus = nx.number_strongly_connected_components(sub_nh_plus)
        if not nx.has_path(graph, s, t):
            len_path_st = -1
        else:
            len_path_st = nx.shortest_path_length(graph, s, t)
        if not nx.has_path(graph, t, s):
            len_path_ts = -1
        else:
            len_path_ts = nx.shortest_path_length(graph, t, s)
        target_feats[i] = [in_d_t, out_d_t, scc_t, wcc_t, sccp_t]
        source_feats[i] = [in_d_s, out_d_s, scc_s, wcc_s, sccp_s]
        edge_feats[i] = [
            com_in, com_on, trans_ts, trans_st, friends_measure_st,
            friends_measure_ts, scc, wcc, scc_plus, len_path_st, len_path_ts
        ]
        if i % 10000 == 0:
            print(i, l)
            t2 = time()
            print(t2 - t1)
            t1 = t2
    return target_feats, source_feats, edge_feats
Exemple #36
0
    def update_model(self, edge: Tuple[str, str],
                     manipulation: int,
                     allow_disconnecting: bool = True,
                     allow_cycles: bool = True) -> bool:
        """
        Updates model according to action and returns the success of the operation. Reversing and removing an edge that
        doesn't exists has no effect. Adding an edge which already exists has no effect.

        :param edge: The edge to be manipulated. e.g. (X0, X1)
        :param manipulation: 0 = remove edge, 1 = add edge, 2 = reverse edge
        :param allow_disconnecting: If true, manipulations which disconnect the causal graph can be executed.
        :param allow_cycles: If true, manipulations which result in a cycle can be executed.
        :return: True if the manipulation was successful. False if it wasn't or it was illegal according to
        'allow_disconnecting' or 'allow_cycles'.
        """

        if manipulation == 0:  # remove edge if exists
            if self.causal_model.has_edge(edge[0], edge[1]):
                self.causal_model.remove_edge(edge[0], edge[1])
                removed_edge = (edge[0], edge[1])
            else:
                return False

            # disconnected graph
            if not allow_disconnecting and nx.number_weakly_connected_components(self.causal_model) > 1:
                self.causal_model.add_edge(removed_edge[0], removed_edge[1])
                return False

        elif manipulation == 1:  # add edge
            if not self.causal_model.has_edge(edge[0], edge[1]):  # only add edge if not already there
                self.causal_model.add_edge(edge[0], edge[1])
            else:
                return False

            if not nx.is_directed_acyclic_graph(self.causal_model) and not allow_cycles:  # check if became cyclic
                self.causal_model.remove_edge(edge[0], edge[1])
                return False

        elif manipulation == 2:  # reverse edge
            if self.causal_model.has_edge(edge[0], edge[1]):
                self.causal_model.remove_edge(edge[0], edge[1])
                self.causal_model.add_edge(edge[1], edge[0])
                added_edge = (edge[1], edge[0])
            else:
                return False

            if not nx.is_directed_acyclic_graph(self.causal_model) and not allow_cycles:  # check if became cyclic
                self.causal_model.remove_edge(added_edge[0], added_edge[1])
                self.causal_model.add_edge(added_edge[1], added_edge[0])
                return False

        return True
Exemple #37
0
    def track_df(self, df, identifiers):
        """
        Tracks the objects in df

        Assigns 3 additional columns to df
        (unique_id, segment_id and track_id).

        Parameters
        ----------
        df : pd.DataFrame
            a pandas dataframe containing the x, y, and t coordinates
            of objects
        identifiers : list
            list of column names for coordinates in df and label column
            (e.g. ['x_coord', 'y_coord', 'timepoint', 'labels'])
        """
        self.df = df.copy()
        self.identifiers = identifiers
        self.number_of_objects = len(df)
        self.number_of_timepoints = np.max(np.unique(df[identifiers[2]]))
        self.adjacency_matrix = np.zeros(
            [self.number_of_objects, self.number_of_objects])
        # add unique identifiers to df
        self.df['unique_id'] = list(range(0, self.number_of_objects))
        # link timepoints to get segments
        self.__get_track_segments()
        # try to link the segments among themselves
        print('linking track segments across timepoints')
        self.__close_gaps()
        # get the final tracks
        self.G2 = nx.DiGraph(self.adjacency_matrix)
        self.number_of_tracks = nx.number_weakly_connected_components(self.G2)
        self.tracks = [
            sorted(c) for c in sorted(
                nx.weakly_connected_components(self.G2), key=len, reverse=True)
        ]

        self.tracks_by_label = [
            list(self.df[self.identifiers[3]].iloc[sorted(c)]) for c in sorted(
                nx.weakly_connected_components(self.G2), key=len, reverse=True)
        ]

        # add column for track ids
        track_ids = []
        for obj in range(0, self.number_of_objects):
            for track_id, track in enumerate(self.tracks):
                if obj in track:
                    track_ids.append(track_id)

        self.df['track_id'] = track_ids
    def calculate_complexity(self):
        '''
        Calculates cyclomatic compexity of graph according to McCabe (1976) - A Complexity Measure
        v(G) = e - n + p
            where:
                v(G)    cyclomatic complexity
                e       number of edges
                n       number of nodes
                p       number of connected components

        It is assumed that ending nodes can loop back to starting nodes by re-execution of the program, thus the graph
         is assumed to always be strongly connected.

        Note: v(G) is the size of the basis set of the graph - i.e. maximum number of linearly independent paths in G
        '''
        return self.graph.number_of_edges() - self.graph.number_of_nodes() + \
                                                nx.number_weakly_connected_components(self.graph)
Exemple #39
0
def all_graphs(num_nodes,directed_motifs):
    """
    Retuns a list of all possible single component graphs given some number of nodes. This function 
    is meant primarily as a helper function to get_subgraphs, but can be used for other user purposes.

    Parameters
    ----------
    num_nodes : The number of nodes in the complete graph

    directed_motifs : A boolean designating whether the motifs should be directed.

    Returns
    ----------
    graphs : A list of all possible single component graphs given some number of nodes
    """
    graphs=list()
    complete=nx.complete_graph(num_nodes)    # Start with complete graph
    complete_copy=copy.deepcopy(complete)
    if directed_motifs:
        complete=complete.to_directed()
        complete_copy=complete_copy.to_directed()
        complete_ud=all_graphs(num_nodes,False) # RECURSION, FTW!
    edges=complete.edges()
    while complete.number_of_edges()>0:
        # Iteratively remove edges, and capture single component subgraphs
        e=edges.pop()
        complete.remove_edge(e[0],e[1])
        if directed_motifs:
            if nx.number_weakly_connected_components(complete)==1:
                graphs.append(copy.deepcopy(complete))
        else:
            if nx.number_connected_components(complete)==1:
                graphs.append(copy.deepcopy(complete))
    graphs.append(complete_copy)
    # Add recursively produced undirected graphs to directed set
    if directed_motifs:
        graphs_edges=map(lambda g: g.edges(), graphs)
        graphs_edges.sort()
        for i in complete_ud:
            ud_edges=i.edges()
            ud_edges.sort()
            if ud_edges not in graphs_edges:
                graphs.append(i.to_directed())
    return graphs
def save_network_statistics(g):
    stats = {}
    stats['num_weakly_connected_components'] = nx.number_weakly_connected_components(g)
    stats['num_strongly_connected_components'] = nx.number_strongly_connected_components(g)
    stats['num_nodes'] = nx.number_of_nodes(g)
    stats['num_edges'] = nx.number_of_edges(g)
    stats['density'] = nx.density(g)
    try:
        stats['avg_clustering_coef'] = nx.average_clustering(g)
    except:
        stats['avg_clustering_coef'] = None # not defined for directed graphs
    stats['avg_degree'] = sum(g.degree().values()) / float(stats['num_nodes'])
    stats['transitivity'] = nx.transitivity(g)
    try:
        stats['diameter'] = nx.diameter(g)
    except:
        stats['diameter'] = None # unconnected --> infinite path length between connected components

    with open('./network-statistics/twitter-combined-statistics.txt', 'wb') as f:
        for stat_name, stat_value in stats.iteritems():
            f.write(stat_name + ': ' + str(stat_value) + '\n')
def run(filename, gt_file, n_iter):
    
    
    f=open(filename)
    line1=f.readline()
    print line1
    f.close()
    if len(line1.split()) !=2:
	   g=input1(filename)
    else:
	   g=input2(filename)
    
    read_to_chr_map={} 
    pos_dict = {}
    mapping_dict = {} 

    chr_lengths = {}
    for chr in range(14):
        chr_lengths[chr] = 1000

    with open(gt_file,'r') as f:
        for num, line in enumerate(f.readlines()):
            m = map(int, line.strip().split())
            # mapping_dict[num] = [min(m), max(m), int(m[0]>m[1])]
            read_to_chr_map[m[0]]= str(m[1])
            mapping_dict[num] = m[1]
            pos_dict[num] = [min(m[2],m[3]),max(m[2],m[3])]    
            # pos_dict[num] = [m[2],m[3],int(m[2]>m[3])]
            chr_lengths[m[1]] = max(chr_lengths[m[1]],max(m[2],m[3]))


    print nx.info(g)
    
    print "Chromosome lenghts:"
    print chr_lengths

    margin = 10000

    del_count = 0


    #print nx.info(g)
    print "Num reads read : "+str(len(read_to_chr_map))

    for cur_edge in g.edges():
        node0=int(cur_edge[0].split('_')[0])
        node1=int(cur_edge[1].split('_')[0])
        # g.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = "{0:.2f}".format(1.0*pos_dict[node0][1]/chr_lengths[mapping_dict[node0]])
        # g.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = "{0:.2f}".format(1.0*pos_dict[node1][0]/chr_lengths[mapping_dict[node1]])
        
        # st_pc is the "start percentage"; i.e., the percent location of edge[0] on its original chromosome
        # end_pc is the "end percentage"; i.e., the percent location of edge[1] on its original chromosome

        g.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = 1.0*pos_dict[node0][1]/chr_lengths[mapping_dict[node0]]
        g.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = 1.0*pos_dict[node1][0]/chr_lengths[mapping_dict[node1]]

    
    for node in g.nodes():
        nodeid=int(node.split('_')[0])

        if pos_dict[nodeid][0] < margin:
            g.remove_node(node)
            del_count += 1
            continue

        if pos_dict[nodeid][1] > chr_lengths[mapping_dict[nodeid]] - margin:
            g.remove_node(node)
            del_count += 1
            continue

        g.node[node]['count'] = 1
        g.node[node]['read'] = node
        #print str(nodeid), node,g.node[node]['chr']

    print "Deleted nodes: "+str(del_count)
        
        
    degree_sequence=sorted(g.degree().values(),reverse=True)
    print Counter(degree_sequence)
    for i in range(n_iter):
        for node in g.nodes():
            if g.in_degree(node) == 0:
                g.remove_node(node)
    
        print nx.info(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)
    
    
    g.graph['aval'] = 1000000000
    
    for i in range(5):
        merge_simple_path(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    h=nx.DiGraph()
    h.add_nodes_from(g)
    h.add_edges_from(g.edges())

    for cur_edge in h.edges():
        h.edge[cur_edge[0]][cur_edge[1]]['st_pc'] = g.edge[cur_edge[0]][cur_edge[1]]['st_pc']
        h.edge[cur_edge[0]][cur_edge[1]]['end_pc'] = g.edge[cur_edge[0]][cur_edge[1]]['end_pc']

    # h = g.copy()

    for node in g.nodes():
        reads_in_node=[int(x.split('_')[0]) for x in g.node[node]['read'].split(':')]
        try:
            chr_in_node=map(lambda x: read_to_chr_map[x], reads_in_node)
        except:
            print reads_in_node,g.node[node]['read']
            return
        chr_in_node_set=set(chr_in_node)
        if len(chr_in_node_set) ==1:
            h.node[node]['chr']=chr_in_node[0]
        else:
            h.node[node]['chr']= ':'.join(chr_in_node)

        h.node[node]['count']=g.node[node]['count']
        try:
            h.node[node]['read']=g.node[node]['read']
        except:
            pass


    try:
        import ujson
        mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json'))
        
        print 'get mapping'
        
        for node in h.nodes():
            #print node
            if mapping.has_key(node):
                h.node[node]['aln_start'] = mapping[node][0]
                h.node[node]['aln_end'] = mapping[node][1]
                h.node[node]['aln_strand'] = mapping[node][2]
            else:
                h.node[node]['aln_start'] = 0
                h.node[node]['aln_end'] = 0
                h.node[node]['aln_strand'] = 0
                
    except:
        pass        


    
    nx.write_graphml(h, filename.split('.')[0]+'_condensed_annotated.graphml')
    nx.write_graphml(g, filename.split('.')[0]+'_G_condensed_annotated.graphml')
    
    print nx.number_weakly_connected_components(h)
    print nx.number_strongly_connected_components(h)
Exemple #42
0
def de_clip(filename, n_nodes, hinge_list,gt_file):

    n_iter = 5

    
    f=open(filename)
    line1=f.readline()
    print line1
    f.close()

    extension = filename.split('.')[-1]

    if extension == 'graphml':
        g=input3(filename)
    elif len(line1.split()) !=2:
        g=input1(filename)
    else:
        g=input2(filename)

    
    print nx.info(g)
    degree_sequence=sorted(g.degree().values(),reverse=True)
    print Counter(degree_sequence)
    
    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)
    
    try:
        import ujson
        mapping = ujson.load(open(gt_file))
        
        print 'getting mapping'
        mapped_nodes=0
        print str(len(mapping)) 
        print str(len(g.nodes()))
        for node in g.nodes():
            # print node
            node_base=node.split("_")[0]
            # print node_base

            #print node
            if mapping.has_key(node_base):
                g.node[node]['aln_start'] = min (mapping[node_base][0][0],mapping[node_base][0][1])
                g.node[node]['aln_end'] = max(mapping[node_base][0][1],mapping[node_base][0][0])
                g.node[node]['chr'] = mapping[node_base][0][2]
                mapped_nodes+=1
            else:
                # pass
                g.node[node]['aln_start'] = 0
                g.node[node]['aln_end'] = 0
                g.node[node]['aln_strand'] = 0


        for edge in g.edges_iter():
            in_node=edge[0]
            out_node=edge[1]
            # print  'akjdfakjhfakljh'
            if ((g.node[in_node]['aln_start'] < g.node[out_node]['aln_start'] and  
                g.node[out_node]['aln_start'] < g.node[in_node]['aln_end']) or 
                (g.node[in_node]['aln_start'] < g.node[out_node]['aln_end'] and
                g.node[out_node]['aln_end'] < g.node[in_node]['aln_end'])):
                g.edge[in_node][out_node]['false_positive']=0
            else:
                g.edge[in_node][out_node]['false_positive']=1

    except:
        raise
        # print "json "+filename.split('.')[0]+'.mapping.json'+" not found. exiting."
           
    print hinge_list

    print str(mapped_nodes)+" out of " +str(len(g.nodes()))+" nodes mapped."
    
    # for i in range(5):
    #     merge_simple_path(g)
    #     degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    #     print Counter(degree_sequence)

    in_hinges = set()
    out_hinges = set()
    num_iter=10000
    iter_done=0
    if hinge_list != None:
        print "Found hinge list."
        with open(hinge_list,'r') as f:
            for lines in f:
                lines1=lines.split()

                if lines1[2] == '1':
                  in_hinges.add(lines1[0]+'_0')
                  out_hinges.add(lines1[0]+'_1')
                elif lines1[2] == '-1':
                  in_hinges.add(lines1[0]+'_1')
                  out_hinges.add(lines1[0]+'_0')

        print str(len(in_hinges))+' hinges found.'

        for node in g.nodes():
            if node in in_hinges and node in out_hinges:
                g.node[node]['hinge']=100
            elif node in in_hinges:
                g.node[node]['hinge']=10
            elif node in out_hinges:
                g.node[node]['hinge']=-10
            else:
                g.node[node]['hinge']=0

        while len(g.nodes()) > n_nodes and iter_done < num_iter :
            node = g.nodes()[random.randrange(len(g.nodes()))]
            iter_done+=1
            # print iter_done
            if g.in_degree(node) == 1 and g.out_degree(node) == 1:

                base_node=node.split("_")[0]
                orintation = node.split("_")[1]
                # if orintation=='1':
                #     node2=base_node+'_0'
                # else:
                #     node2=base_node+'_1'

                # print node,node2

                in_node = g.in_edges(node)[0][0]
                out_node = g.out_edges(node)[0][1]

                if g.node[node]['hinge']==0 and g.node[in_node]['hinge']==0  and g.node[out_node]['hinge']==0:
                    if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1:
                        if in_node != node and out_node != node and in_node != out_node:
                            bad_node=False
                            # print g.in_edges(node)
                            # print g.edge[g.in_edges(node)[0][0]][g.in_edges(node)[0][1]]
                            # print g.out_edges(node)
                            for in_edge in g.in_edges(node):
                                if g.edge[in_edge[0]][in_edge[1]]['false_positive']==1:
                                    bad_node=True
                            for out_edge in g.out_edges(node):
                                if g.edge[out_edge[0]][out_edge[1]]['false_positive']==1:
                                    bad_node=True 
                            if not bad_node:
                                #print in_node, node, out_node
                                merge_path(g,in_node,node,out_node)


                # print g.edge[edge1[0]][edge1[1]]['hinge_edge']

                for nd in g.nodes():
                    if len(nd.split("_"))==1:
                        print nd + " in trouble"
                # in_node = g.in_edges(node2)[0][0]
                # out_node = g.out_edges(node2)[0][1]
                # if g.node[node2]['hinge']==0 and g.node[in_node]['hinge']==0  and g.node[out_node]['hinge']==0:
                #     if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1:
                #         if in_node != node2 and out_node != node2 and in_node != out_node:
                #             bad_node=False
                #             for in_edge in g.in_edges(node2):
                #                 if g.edge[in_edge]==1:
                #                     bad_node=True
                #             for out_edge in g.out_edges(node2):
                #                 if g.edge[out_edge]==1:
                #                     bad_node=True 
                #             if not bad_node:
                #                 #print in_node, node, out_node
                #                 merge_path(g,in_node,node2,out_node)


            # for nd in g.nodes():
            #     print nd

    else:
        while len(g.nodes()) > n_nodes:

            node = g.nodes()[random.randrange(len(g.nodes()))]



            if g.in_degree(node) == 1 and g.out_degree(node) == 1:

                # assert g.in_degree(node2) == 1 and g.out_degree(node2) == 1
                # edge_1 = g.out_edges(node)[0]
                # edge_2 = g.in_edges(node)[0]

                edge1 = g.out_edges(node)[0]
                edge2 = g.in_edges(node)[0]

                # print g.edge[edge1[0]][edge1[1]]['hinge_edge']

                if (g.edge[edge1[0]][edge1[1]]['hinge_edge'] == -1 and g.edge[edge2[0]][edge2[1]]['hinge_edge'] == -1):
                
                    in_node = g.in_edges(node)[0][0]
                    out_node = g.out_edges(node)[0][1]
                    if g.out_degree(in_node) == 1 and g.in_degree(out_node) == 1:
                        if in_node != node and out_node != node and in_node != out_node:
                            #print in_node, node, out_node
                            merge_path(g,in_node,node,out_node)







    
    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)

    
    nx.write_graphml(g, filename.split('.')[0]+'.sparse3.graphml')
    
    print nx.number_weakly_connected_components(g)
    print nx.number_strongly_connected_components(g)
Exemple #43
0
 def n_components(self):
     return networkx.number_weakly_connected_components(self)
                                    if to_node.is_white:
                                        G.add_node(to_node_position, side="WHITE")
                                    elif to_node.is_black:
                                        G.add_node(to_node_position, side="BLACK")
                                except AttributeError:
                                    G.add_node(to_node_position, side="UNDEFINED")
                                if to_node == None:
                                    G.add_edge(from_node_position, to_node_position, weight=0.0)
                                elif i.is_opponent(to_node):
                                    G.add_edge(from_node_position, to_node_position, weight=-1.0)
                                else:
                                    G.add_edge(from_node_position, to_node_position, weight=1.0)                        
            fout.write(move + '\t')                
            fout.write(str(side) + '\t') 
            fout.write(str(nx.number_strongly_connected_components(G)) + '\t')
            fout.write(str(nx.number_weakly_connected_components(G)) + '\t')
            try:
                fout.write(str(nx.average_shortest_path_length(G)) + '\t')
            except:
                fout.write("Not connected" + '\t')
            fout.write(str(side == winner) + '\n')
            '''
            edefensive=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] >= 0.0]
            #eoffensive=[(u,v) for (u,v,d) in G.edges(data=True) if d['weight'] <= 0.0]

            pos=nx.circular_layout(G) # positions for all nodes

            # nodes
            if side == 1:
                nx.draw_networkx_nodes(G,pos,nodelist=[node for (node,d) in G.nodes(data=True) if d['side'] == "BLACK"], node_color='blue',node_size=300)
                nx.draw_networkx_nodes(G,pos,nodelist=[node for (node,d) in G.nodes(data=True) if d['side'] == "UNDEFINED"], node_color='blue',node_size=300)
G = nx.read_edgelist('Email-EuAll.txt', create_using=nx.DiGraph(),
                     nodetype=int)
print('')

print("Properti jaringan")
print("-----------------")
print('')
N, K = G.order(), G.size()
avg_deg = float(K)/N
print('Jumlah node: %d' % N)
print('Jumlah edge: %d' % K)
print('Rata-rata edge: %.3f' % avg_deg)
print('Jumlah strongly connected component: %d' %
      nx.number_strongly_connected_components(G))
print('Jumlah weakly connected component: %d' %
      nx.number_weakly_connected_components(G))
print('')

print('Distribusi degree')
print('-----------------')
print('')
in_degrees = G.in_degree()
in_values = sorted(set(in_degrees.values()))
in_hist = [in_degrees.values().count(x) for x in in_values]
out_degrees = G.out_degree()
out_values = sorted(set(out_degrees.values()))
out_hist = [out_degrees.values().count(x) for x in out_values]

print('Pembuatan grafik...')
plt.figure()
plt.grid(True)
            #time_vector.sort()

            sorted_time_d = sorted(time_d.items(),key=operator.itemgetter(1))
            name_vector = [ele[0] for ele in sorted_time_d] #only includes rters
            time_vector = [ele[1] for ele in sorted_time_d]

            N_t = len(time_vector)
            N_ut = number_of_unique_nodes - N_t

            #print time_d.values()
            #continue
            #edges = [(srcs[j],rters[j]) for j in xrange(len(srcs))]
            subgraph = nx.DiGraph()
            subgraph.add_edges_from(edges)

            if nx.number_weakly_connected_components(subgraph) == 1:
                #print time_vector
                #print N_t,N_ut
                cnt += 1

                result = my_EM(N_t,N_ut,time_vector)
                named_result = {}
                unknown_result = {}

                sorted_result = sorted(result.items(),key=operator.itemgetter(1),reverse=True)

                #indexed_gnd_truth = sorted(gnd_truth.items()[0],gnd_truth.items()[1])

                #evaluate top k nodes acc, pick 2K candidates
                #K = [1:6]
import json
from util.read_utils import lines_per_n
import community
import networkx as nx

author_graph = nx.DiGraph()
with open('clean_data.json', 'r') as jfile:
    for chunk in lines_per_n(jfile, 9):
        hdr_data = json.loads(chunk)
        for to_addr in str(hdr_data['To']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time'])
        for cc_addr in str(hdr_data['Cc']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time'])
    jfile.close()

print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph))
print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph))
print("Nodes:", nx.number_of_nodes(author_graph))
print("Edges:", nx.number_of_edges(author_graph))

#The following lines of code generate a dendogram for the above graph
dendo = community.generate_dendogram(author_graph.to_undirected())
for level in range(len(dendo)) :
    print("Partition at level", level, "is", community.partition_at_level(dendo, level))
    print("-"*10)
def mask_test_edges_directed(adj, test_frac=.1, val_frac=.05, 
    prevent_disconnect=True, verbose=False, false_edge_sampling='iterative'):
    if verbose == True:
        print 'preprocessing...'

    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    # Convert to networkx graph to calc num. weakly connected components
    g = nx.from_scipy_sparse_matrix(adj, create_using=nx.DiGraph())
    orig_num_wcc = nx.number_weakly_connected_components(g)

    adj_tuple = sparse_to_tuple(adj) # (coords, values, shape)
    edges = adj_tuple[0] # List of ALL edges (either direction)
    edge_pairs = [(edge[0], edge[1]) for edge in edges] # store edges as list of tuples (from_node, to_node)

    num_test = int(np.floor(edges.shape[0] * test_frac)) # controls how large the test set should be
    num_val = int(np.floor(edges.shape[0] * val_frac)) # controls how alrge the validation set should be
    num_train = len(edge_pairs) - num_test - num_val # num train edges

    all_edge_set = set(edge_pairs)
    train_edges = set(edge_pairs) # init train_edges to have all edges
    test_edges = set() # init test_edges as empty set
    val_edges = set() # init val edges as empty set

    ### ---------- TRUE EDGES ---------- ###
    # Shuffle and iterate over all edges
    np.random.shuffle(edge_pairs)

    # get initial bridge edges
    bridge_edges = set(nx.bridges(nx.to_undirected(g))) 

    if verbose:
        print('creating true edges...')

    for ind, edge in enumerate(edge_pairs):
        node1, node2 = edge[0], edge[1]

        # Recalculate bridges every ____ iterations to relatively recent
        if ind % 10000 == 0:
            bridge_edges = set(nx.bridges(nx.to_undirected(g))) 

        # Don't sample bridge edges to increase likelihood of staying connected
        if (node1, node2) in bridge_edges or (node2, node1) in bridge_edges: 
            continue

        # If removing edge would disconnect the graph, backtrack and move on
        g.remove_edge(node1, node2)
        if prevent_disconnect == True:
            if not nx.is_weakly_connected(g):
                g.add_edge(node1, node2)
                continue

        # Fill test_edges first
        if len(test_edges) < num_test:
            test_edges.add(edge)
            train_edges.remove(edge)
            if len(test_edges) % 10000 == 0 and verbose == True:
                print 'Current num test edges: ', len(test_edges)

        # Then, fill val_edges
        elif len(val_edges) < num_val:
            val_edges.add(edge)
            train_edges.remove(edge)
            if len(val_edges) % 10000 == 0 and verbose == True:
                print 'Current num val edges: ', len(val_edges)

        # Both edge lists full --> break loop
        elif len(test_edges) == num_test and len(val_edges) == num_val:
            break



    # Check that enough test/val edges were found
    if (len(val_edges) < num_val or len(test_edges) < num_test):
        print "WARNING: not enough removable edges to perform full train-test split!"
        print "Num. (test, val) edges requested: (", num_test, ", ", num_val, ")"
        print "Num. (test, val) edges returned: (", len(test_edges), ", ", len(val_edges), ")"

    # Print stats for largest remaining WCC
    print 'Num WCC: ', nx.number_weakly_connected_components(g)
    largest_wcc_set = max(nx.weakly_connected_components(g), key=len)
    largest_wcc = g.subgraph(largest_wcc_set)
    print 'Largest WCC num nodes: ', largest_wcc.number_of_nodes()
    print 'Largest WCC num edges: ', largest_wcc.number_of_edges()

    if prevent_disconnect == True:
        assert nx.number_weakly_connected_components(g) == orig_num_cc

    # Fraction of edges with both endpoints in largest WCC
    def frac_edges_in_wcc(edge_set):
        num_wcc_contained_edges = 0.0
        num_total_edges = 0.0
        for edge in edge_set:
            num_total_edges += 1
            if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set:
                num_wcc_contained_edges += 1
        frac_in_wcc = num_wcc_contained_edges / num_total_edges
        return frac_in_wcc

    # Check what percentage of edges have both endpoints in largest WCC
    print 'Fraction of train edges with both endpoints in L-WCC: ', frac_edges_in_wcc(train_edges)
    print 'Fraction of test edges with both endpoints in L-WCC: ', frac_edges_in_wcc(test_edges)
    print 'Fraction of val edges with both endpoints in L-WCC: ', frac_edges_in_wcc(val_edges)

    # Ignore edges with endpoint not in largest WCC
    print 'Removing edges with either endpoint not in L-WCC from train-test split...'
    train_edges = {edge for edge in train_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}
    test_edges = {edge for edge in test_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}
    val_edges = {edge for edge in val_edges if edge[0] in largest_wcc_set and edge[1] in largest_wcc_set}


    ### ---------- FALSE EDGES ---------- ###

    # Initialize empty sets
    train_edges_false = set()
    test_edges_false = set()
    val_edges_false = set()

    # Generate candidate false edges (from g-complement) and iterate through them
    if false_edge_sampling == 'iterative':
        if verbose == True:
            print 'preparing complement adjacency matrix...'

        # Sample false edges from G-complement, instead of randomly generating edges
        # g_complement = nx.complement(g)
        adj_complement = 1 - adj.toarray() # flip 0's, 1's in adjacency matrix
        np.fill_diagonal(adj_complement, val=0) # set diagonals to 0

        # 2 numpy arrays indicating x, y coords in adj_complement
            # WARNING: This line can use up a lot of RAM depending on 'adj' size
        idx1, idx2 = np.where(adj_complement == 1) 
            
        edges_false = np.stack((idx1, idx2), axis=-1) # stack arrays into coord pairs.
        edge_pairs_false = [(edge[0], edge[1]) for false_edge in edges_false]

        # Shuffle and iterate over false edges
        np.random.shuffle(edge_pairs_false)
        if verbose == True:
            print 'adding candidate false edges to false edge sets...'
        for false_edge in edge_pairs_false:
            # Fill train_edges_false first
            if len(train_edges_false) < len(train_edges):
                train_edges_false.add(false_edge)
                if len(train_edges_false) % 100000 == 0 and verbose == True:
                    print 'Current num false train edges: ', len(train_edges_false)

            # Fill test_edges_false next
            elif len(test_edges_false) < len(test_edges):
                test_edges_false.add(false_edge)
                if len(test_edges_false) % 100000 == 0 and verbose == True:
                    print 'Current num false test edges: ', len(test_edges_false)

            # Fill val_edges_false last
            elif len(val_edges_false) < len(val_edges):
                val_edges_false.add(false_edge)
                if len(val_edges_false) % 100000 == 0 and verbose == True:
                    print 'Current num false val edges: ', len(val_edges_false)

            # All sets filled --> break
            elif len(train_edges_false) == len(train_edges) and \
                len(test_edges_false) == len(test_edges) and \
                len(val_edges_false) == len(val_edges):
                break

    # Randomly generate false edges (idx_i, idx_j) 1 at a time to save memory
    elif false_edge_sampling == 'random':
        if verbose == True:
            print 'creating false test edges...'

        # FALSE TEST EDGES
        while len(test_edges_false) < len(test_edges):
            idx_i = np.random.randint(0, adj.shape[0])
            idx_j = np.random.randint(0, adj.shape[0])
            if idx_i == idx_j: # no self-loops
                continue

            # Ensure both endpoints are in largest WCC
            if idx_i not in largest_wcc_set or idx_j not in largest_wcc_set:
                continue

            false_edge = (idx_i, idx_j)

            # Make sure false_edge not an actual edge, and not a repeat
            if false_edge in all_edge_set:
                continue
            if false_edge in test_edges_false:
                continue

            test_edges_false.add(false_edge)

            if len(test_edges_false) % 100000 == 0 and verbose == True:
                print 'Current num false test edges: ', len(test_edges_false)

        # FALSE VAL EDGES
        if verbose == True:
            print 'creating false val edges...'

        while len(val_edges_false) < len(val_edges):
            idx_i = np.random.randint(0, adj.shape[0])
            idx_j = np.random.randint(0, adj.shape[0])
            if idx_i == idx_j:
                continue

            false_edge = (idx_i, idx_j)

            # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat
            if false_edge in all_edge_set or \
                false_edge in test_edges_false or \
                false_edge in val_edges_false:
                continue
                
            val_edges_false.add(false_edge)

            if len(val_edges_false) % 100000 == 0 and verbose == True:
                print 'Current num false val edges: ', len(val_edges_false)

        # FALSE TRAIN EDGES
        if verbose == True:
            print 'creating false train edges...'

        while len(train_edges_false) < len(train_edges):
            idx_i = np.random.randint(0, adj.shape[0])
            idx_j = np.random.randint(0, adj.shape[0])
            if idx_i == idx_j:
                continue

            false_edge = (idx_i, idx_j)

            # Make sure false_edge in not an actual edge, not in test_edges_false, 
                # not in val_edges_false, not a repeat
            if false_edge in all_edge_set or \
                false_edge in test_edges_false or \
                false_edge in val_edges_false or \
                false_edge in train_edges_false:
                continue

            train_edges_false.add(false_edge)

            if len(train_edges_false) % 100000 == 0 and verbose == True:
                print 'Current num false train edges: ', len(train_edges_false)


    ### ---------- FINAL DISJOINTNESS CHECKS ---------- ###
    if verbose == True:
        print 'final checks for disjointness...'

    # assert: false_edges are actually false (not in all_edge_tuples)
    assert test_edges_false.isdisjoint(all_edge_set)
    assert val_edges_false.isdisjoint(all_edge_set)
    assert train_edges_false.isdisjoint(all_edge_set)

    # assert: test, val, train false edges disjoint
    assert test_edges_false.isdisjoint(val_edges_false)
    assert test_edges_false.isdisjoint(train_edges_false)
    assert val_edges_false.isdisjoint(train_edges_false)

    # assert: test, val, train positive edges disjoint
    assert val_edges.isdisjoint(train_edges)
    assert test_edges.isdisjoint(train_edges)
    assert val_edges.isdisjoint(test_edges)

    if verbose == True:
        print 'creating adj_train...'

    # Re-build adj matrix using remaining graph
    adj_train = nx.adjacency_matrix(g)

    # Convert edge-lists to numpy arrays
    train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges])
    train_edges_false = np.array([list(edge_tuple) for edge_tuple in train_edges_false])
    val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges])
    val_edges_false = np.array([list(edge_tuple) for edge_tuple in val_edges_false])
    test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges])
    test_edges_false = np.array([list(edge_tuple) for edge_tuple in test_edges_false])

    if verbose == True:
        print 'Done with train-test split!'
        print 'Num train edges (true, false): (', train_edges.shape[0], ', ', train_edges_false.shape[0], ')'
        print 'Num test edges (true, false): (', test_edges.shape[0], ', ', test_edges_false.shape[0], ')'
        print 'Num val edges (true, false): (', val_edges.shape[0], ', ', val_edges_false.shape[0], ')'
        print ''

    # Return final edge lists (edges can go either direction!)
    return adj_train, train_edges, train_edges_false, \
        val_edges, val_edges_false, test_edges, test_edges_false
            #time_vector.sort()

            sorted_time_d = sorted(time_d.items(),key=operator.itemgetter(1))
            name_vector = [ele[0] for ele in sorted_time_d] #only includes rters with exact time
            time_vector = [ele[1] for ele in sorted_time_d]
            N_t = len(time_vector)
            N_ut = number_of_unique_nodes - N_t

            #cascade_cent_array = centrality_look_up(ind_d,cent_df,unique_nodes.keys())

            #print time_d.values()
            #continue
            #edges = [(srcs[j],rters[j]) for j in xrange(len(srcs))]
            subgraph = nx.DiGraph()
            subgraph.add_edges_from(edges)
            number_of_weakly_cc = nx.number_weakly_connected_components(subgraph)

            if nx.number_weakly_connected_components(subgraph) <= 10: #need to handle nodes without gnd_truth
                #print time_vector
                #print N_t,N_ut
                cnt += 1

                #result = my_EM(N_t,N_ut,time_vector)
                #result = centrality_look_up(ind_d,cent_df,name_vector)[0]

                unique_nodes_list = unique_nodes.keys()
                cent_inds = centrality_look_up(ind_d,cent_df,unique_nodes_list)[0]
                cent_names = [unique_nodes_list[ind] for ind in cent_inds]


def de_clip(filename, n_iter):

    g = nx.MultiDiGraph()
    
    # count = 0

    with open(filename,'r') as f:
        for line in f.xreadlines():
            l = line.strip().split()
            #print l2
            g.add_edge(l[0],l[1],overlap=int(l[2])/2)
            # if count < 10:
            #     print l[0], l[1], l[2]
            #     count += 1

            node0start = int(l[7][1:])
            node0end = int(l[8][:-1])

            g.node[l[0]]['length'] = node0end - node0start

            node1start = int(l[9][1:])
            node1end = int(l[10][:-1])

            g.node[l[1]]['length'] = node1end - node1start

    
    print nx.info(g)

    try:
        import ujson
        mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json'))
        
        # print mapping

        print 'get mapping'
        
        for node in g.nodes():
            #print node
            if mapping.has_key(node):

                # alnstart = int(mapping[node][0])
                # alnend = int(mapping[node][1])

                # g.node[node]['length'] = abs(alnend-alnstart)
                # print abs(alnend-alnstart)

                g.node[node]['aln_strand'] = mapping[node][3]

                # g.node[node]['aln_start'] = mapping[node][0]
                # g.node[node]['aln_end'] = mapping[node][1]
                # g.node[node]['aln_strand'] = mapping[node][2]
            else:
                # g.node[node]['length'] = 5000
                g.node[node]['aln_strand'] = 5
                # print "this happened"
                # g.node[node]['aln_start'] = 0
                # g.node[node]['aln_end'] = 0
                # g.node[node]['aln_strand'] = 0
                
    except:
        pass 



    degree_sequence=sorted(g.degree().values(),reverse=True)
    print Counter(degree_sequence)
    for i in range(n_iter):
        for node in g.nodes():
            if g.degree(node) < 2:
                g.remove_node(node)

        print nx.info(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)

    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)
    
    
    g.graph['aval'] = 1000000000
    
    for i in range(5):
        merge_simple_path(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
       
    
    nx.write_graphml(g, filename.split('.')[0]+'.graphml')
    
    print nx.number_weakly_connected_components(g)
    print nx.number_strongly_connected_components(g)


    # Next we create the gfa file


    outputfile = filename.split('.')[0]+'.gfa'
    with open(outputfile, 'w') as fout:

        for cur_node in g.nodes():

            node_length = g.node[cur_node]['length']
            node_str = 'A'*node_length
            node_str = node_str + '\n'

            fout.write("NODE "+str(cur_node)+' 0 0 0 0 0\n')
            fout.write(node_str)
            fout.write(node_str)
            # print "NODE "+str(node)

        for arc in g.edges():
            fout.write("ARC "+str(arc[0])+' '+str(arc[1])+' 0\n')



    # Compute N50

    contig_lengths = []

    for cur_node in g.nodes():
        contig_lengths.append(g.node[cur_node]['length'])

    print "N50 = "+str(comp_n50(contig_lengths))
import matplotlib.pyplot as plt
from math import log

##################################
######### READ EDGE LIST #########
##################################

print('Reading edgelist')

# Read combined edge-list
twitter_edges_dir = './twitter/twitter_combined.txt'
edges_f = open(twitter_edges_dir)

# Parse edgelist into directed graph
twitter_g = nx.read_edgelist(edges_f, nodetype=int, create_using=nx.DiGraph())
print('Num. weakly connected components: ', nx.number_weakly_connected_components(twitter_g))

# print('Saving adjacency matrix')

# Get adjacency matrix
adj = nx.adjacency_matrix(twitter_g)

# Save adjacency matrix
with open('./twitter/twitter-combined-adj.pkl', 'wb') as f:
    pickle.dump(adj, f)



##################################
##### VISUALIZATIONS, STATS ######
##################################
Exemple #52
0
        if len(g)>=size_of_component:
            uu+=1
    if uu>=number_of_components:
        break
    else:
        counte+=1
        continue

print str(" ")
print G.name
print str(" ")

G.remove_nodes_from(nx.isolates(G))

print 'Number of strongly connected components:', nx.number_strongly_connected_components(G)
print 'Number of weakly connected components:', nx.number_weakly_connected_components(G)
print str(" ")

print 'Number of unilaterally connected components:', len(ucc)
print str(" ")

print 'Unilaterally connected components (UCC):'
for i in range(len(ucc)):
    print 'UCC', str(i+1)+':', ucc[i]
print str(" ")

print 'Edges in unilaterally connected components:'
for i in range(len(ucce)):
    print 'Edges in UCC', str(i+1)+':', ucce[i]
print str(" ")
def run(filename, gt_file, n_iter):
    
    
    f=open(filename)
    line1=f.readline()
    print line1
    f.close()
    if len(line1.split()) !=2:
	   g=input1(filename)
    else:
	   g=input2(filename)
    
    print str(len(g.nodes())) + " vertices in graph to begin with."

    connected_components=[x for x in nx.weakly_connected_components(g)]
    for component in connected_components:
        if len(component) < 10:
            g.remove_nodes_from(component)

    print str(len(g.nodes())) + " vertices in graph after removing components of at most "+str(LENGTH_THRESHOLD)+ " nodes."

    read_to_chr_map={}

    if gt_file.split('.')[-1]=='json':
        with open(gt_file,'r') as f:
            tmp_map=json.load(f)
        for read in tmp_map:
            readid=int(read.strip("'"))
            read_to_chr_map[readid] = int(tmp_map[read][0][2])
    else:
        with open(gt_file,'r') as f:
            for num, line in enumerate(f.readlines()):
                m = map(int, line.strip().split())
                read_to_chr_map[m[0]]=m[1]   
    
    nodes_seen=set([x.split("_")[0] for x in g.nodes()])

    for node in nodes_seen:
        read_to_chr_map.setdefault(int(node),-1)

    #print nx.info(g)
    print "Num reads read : "+str(len(read_to_chr_map))
    
    for node in g.nodes():
        nodeid=int(node.split('_')[0])

        g.node[node]['count'] = 1
        g.node[node]['read'] = node
        #print str(nodeid), node,g.node[node]['chr']
        
        
    degree_sequence=sorted(g.degree().values(),reverse=True)
    print Counter(degree_sequence)
    for i in range(n_iter):
        for node in g.nodes():
            if g.in_degree(node) == 0:
                g.remove_node(node)
    
        print nx.info(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)
    
    
    g.graph['aval'] = 1000000000
    
    for i in range(5):
        merge_simple_path(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    h=nx.DiGraph()
    h.add_nodes_from(g)
    h.add_edges_from(g.edges())
    for node in g.nodes():
        reads_in_node=[int(x.split('_')[0]) for x in g.node[node]['read'].split(':')]
        try:
            chr_in_node=map(lambda x: read_to_chr_map[x], reads_in_node)
        except:
            print reads_in_node,g.node[node]['read']
            return
        chr_in_node_set=set(chr_in_node)
        if len(chr_in_node_set) ==1:
            h.node[node]['chr']=chr_in_node[0]
        else:
            h.node[node]['chr']=':'.join(map(str,chr_in_node))

        h.node[node]['count']=g.node[node]['count']
        try:
            h.node[node]['read']=g.node[node]['read']
        except:
            pass

    
    nx.write_graphml(h, filename.split('.')[0]+'_condensed_annotated.graphml')
    
    print nx.number_weakly_connected_components(h)
    print nx.number_strongly_connected_components(h)
with open("graph_edges.csv", "r") as edge_file:
    for pair in edge_file:
        edge = pair.split(';')
        edge[1] = edge[1].strip()
        try:
            discussion_graph.node[edge[0]]['sender']
            discussion_graph.node[edge[1]]['sender']
            discussion_graph.add_edge(*edge)
        except KeyError:
            pass
    edge_file.close()
print("Edges added.")

print("No. of Nodes: ", nx.number_of_nodes(discussion_graph))
print("No. of Edges: ", nx.number_of_edges(discussion_graph))
print("No. of Weakly Connected Components: ", nx.number_weakly_connected_components(discussion_graph))

# Uncomment the lines below to save the graph as a GEXF file
# nx.write_gexf(discussion_graph, "gexf/master_disc_graph.gexf")
# print("GEXF file generated.")

# Uncomment the lines below to read the graph from a GEXF file
# discussion_graph = nx.read_gexf("gexf/master_disc_graph.gexf", node_type=int)
# print("Graph loaded from GEXF file.")

for conn_subgraph in nx.weakly_connected_component_subgraphs(discussion_graph):
    sender_color_map = {}
    node_list = [int(x) for x in conn_subgraph.nodes()]
    # Comment the respective lines below to only save in the required formats
    nx.write_gexf(conn_subgraph, 'gexf/' + str(min(node_list))+'.gexf')
Exemple #55
0
      listofgraphs.append((G, int(number)))
    number = line.strip().split()[1]
    edgelist = []
  else:
    edgelist.append(line.rstrip())
f.close()

G = nx.read_edgelist(edgelist, create_using=nx.DiGraph())
listofgraphs.append((G, int(number)))                 

F = None
for (G, number) in listofgraphs:
  for i in range(number):
    if F == None:
      F = G.copy()
    else:
      F = nx.disjoint_union(F,G)

while nx.number_weakly_connected_components(F) > 1:
  addRandomEdge(F)

F = nx.convert_node_labels_to_integers(F,1)

f2 = open(sys.argv[1] + '.OUT', 'wb')
f2.write(str(len(F.nodes())) + '\n')
nx.write_edgelist(F, f2, data=False)
f2.close()
#os.system("./Kavosh -i output.txt -r 1000 -s 3")

#nx.draw(F)
#plt.show()
def run(filename, gt_file, n_iter):
    
    
    f=open(filename)
    line1=f.readline()
    print line1
    f.close()
    if len(line1.split()) !=2:
	   g=input1(filename)
    else:
	   g=input2(filename)
    
    mapping_dict = {}

    with open(gt_file,'r') as f:
        for num, line in enumerate(f.readlines()):
            m = map(int, line.strip().split())
            # mapping_dict[num] = [min(m), max(m), int(m[0]>m[1])]
            mapping_dict[num] = m[1]    
    
    print nx.info(g)
    
    
    for node in g.nodes():
        nodeid=int(node.split('_')[0])

        g.node[node]['count'] = 1
        g.node[node]['chr']=mapping_dict[nodeid]
        g.node[node]['read'] = node
        #print str(nodeid), node,g.node[node]['chr']
        
        
    degree_sequence=sorted(g.degree().values(),reverse=True)
    print Counter(degree_sequence)
    for i in range(n_iter):
        for node in g.nodes():
            if g.in_degree(node) == 0:
                g.remove_node(node)
    
        print nx.info(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    degree_sequence=sorted(nx.degree(g).values(),reverse=True)
    print Counter(degree_sequence)
    
    
    g.graph['aval'] = 1000000000
    
    for i in range(5):
        merge_simple_path(g)
        degree_sequence=sorted(nx.degree(g).values(),reverse=True)
        print Counter(degree_sequence)
    
    h=nx.DiGraph()
    h.add_nodes_from(g)
    h.add_edges_from(g.edges())
    for node in g.nodes():
        h.node[node]['count']=g.node[node]['count']
        h.node[node]['chr']=g.node[node]['chr']
        try:
            h.node[node]['read']=g.node[node]['read']
        except:
            pass


    try:
        import ujson
        mapping = ujson.load(open(filename.split('.')[0]+'.mapping.json'))
        
        print 'get mapping'
        
        for node in h.nodes():
            #print node
            if mapping.has_key(node):
                h.node[node]['aln_start'] = mapping[node][0]
                h.node[node]['aln_end'] = mapping[node][1]
                h.node[node]['aln_strand'] = mapping[node][2]
            else:
                h.node[node]['aln_start'] = 0
                h.node[node]['aln_end'] = 0
                h.node[node]['aln_strand'] = 0
                
    except:
        pass        


    
    nx.write_graphml(h, filename.split('.')[0]+'_condensed.graphml')
    
    print nx.number_weakly_connected_components(h)
    print nx.number_strongly_connected_components(h)
 def test_number_weakly_connected_components(self):
     for G, C in self.gc:
         U = G.to_undirected()
         w = nx.number_weakly_connected_components(G)
         c = nx.number_connected_components(U)
         assert_equal(w, c)
pos=nx.spring_layout(G,k=0.15,iterations=10)
# pos=nx.graphviz_layout(G)
# pos=layout(G)

G.remove_nodes_from(nx.isolates(G))

print str(" ")
print 'WEAK CONNECTEDNESS OF DIRECTED GRAPHS'
print str(" ")

print str(" ")
print G.name
print str(" ")

print 'Is graph G weakly connected?', nx.is_weakly_connected(G)
print 'The number of weakly connected components of G is:', nx.number_weakly_connected_components(G)
print str(" ")

lc=sorted(nx.weakly_connected_components(G), key = len, reverse=True)
print 'List of weakly connected components:'
# print sorted(nx.weakly_connected_components(G), key = len, reverse=True)
print lc
print str(" ")


deg=G.degree()
deg_dic=[]
for nd in deg:
    if deg[nd]>0:
        deg_dic.append(nd)
node0 = random.choice(deg_dic)
Exemple #59
0
def compute_quota(G, gg, date, windowsize, topic, all_uid_pr, network_type):
    prekey = _utf8_unicode(topic)+'_'+str(date)+'_'+str(windowsize)
    #print 'prekey:', prekey.encode('utf-8')
    #print 'G_nodes:',len(G.nodes())
    #print 'gg_nodes:', len(gg.nodes())
    #无向图的最大连通子图
    

    G_edges = G.edges()
    print 'G_edges:',len(G_edges)
    '''
    nodes_list = G.nodes()
    l = len(nodes_list)
    print 'l:', l
    r = random.randint(0,l-1)
    print 'r:', r
    bfs_edges = list(nx.bfs_edges(gg,nodes_list[r]))
    print 'bfs_edges:', bfs_edges
    print 'len(bfs_edges):', len(bfs_edges)
    '''
    degree = G.degree()
    print 'degree_counter'
    degree_test = get_counter(degree)

    indegree = G.in_degree()
    #print 'indegree:', indegree
    indegree_histogram = get_counter(indegree)
    save_quota(prekey+'_indegree_histogram_'+str(network_type), json.dumps(indegree_histogram))
    outdegree = G.out_degree()
    #print 'outdegree:', outdegree
    outdegree_histogram = get_counter(outdegree)
    save_quota(prekey+'_outdegree_histogram_'+str(network_type), json.dumps(outdegree_histogram))
    
    HH = nx.connected_component_subgraphs(gg)
    maxhn = 0
    for h in HH:
        if maxhn < len(h.nodes()):
            maxhn = len(h.nodes())
            H = h
    #print 'H_nodes:', len(H.nodes())
    
    #ndegree = G.degree()
    # 节点度,dict{nodes:value}
    #get_key_user('node_degree', topic, date, windowsize, ndegree)
    #根据节点度排序,获取节点度层面的关键用户
    
    dCentrality = nx.degree_centrality(G)
    # 度中心性 dict{nodes:value} 度量重要性
    
    avedc = get_ave(dCentrality)
    #平均度中心性 float
    save_quota(prekey+'_ave_degree_centrality_'+str(network_type), avedc)
    
    maxwcc = nx.weakly_connected_component_subgraphs(G).next()
    #print 'maxwcc_G:', len(maxwcc)
    
    bCentrality = nx.betweenness_centrality(G)
    # 介数中心 dict{nodes:value},度量其对网络流程的重要性
    
    avebc = get_ave(bCentrality)
    # 平均介数中心性 float
    save_quota(prekey+'_ave_betweenness_centrality_'+str(network_type), avebc)
    
    cCentrality = nx.closeness_centrality(G)
    # 紧密中心性 dict{nodes:value},度量感知整个网络流程事件的位置
    avecc = get_ave(cCentrality)
    # 平均紧密中心性 float
    save_quota(prekey+'_ave_closeness_centrality_'+str(network_type), avecc)
    
    # get_key_user module
    print 'get_user'
    get_key_user(topic, date, windowsize, dCentrality, bCentrality, cCentrality, network_type)
    ''' 
    eCentrality = nx.eigenvector_centrality_numpy(G)
    # 特征向量中心性
    #get_key_user('eigenvector_centrality', topic, date, windowsize, eCentrality)
    # 获取特征向量中心性层面的关键用户
    aveec = get_ave(eCentrality)
    # 平均特征向量中心性 float
    save_quota(prekey+'_eigenvector_centrality_'+str(network_type), aveec)
    '''
    spl_histogram = get_spl_histogram(H)
    save_quota(prekey + '_shortest_path_length_histogram_'+str(network_type), json.dumps(spl_histogram))
       
    avespl = nx.average_shortest_path_length(H) # !!!!
    # 平均最短路径长度 float--only for connected gragh
    save_quota(prekey+'_average_shortest_path_length_'+str(network_type), avespl)
   
    dhistogram = nx.degree_histogram(G)
    # 节点度分布(从一到最大度的出现频次)
    save_quota(prekey+'_degree_histogram_'+str(network_type), dhistogram)


    '''
    #Hdhistogram = nx.degree_histogram(G) # !!!!
    # histogram of H-----max connected graph
    #save_quota(prekey + '_H_degree_histogram', Hdhistogram)
    '''
    gamma = get_powerlaw(dhistogram, prekey)
    # 幂律分布系数
    save_quota(prekey+'_power_law_distribution_'+str(network_type), gamma)
    
    
    nnodes = len(G.nodes())
    # the number of nodes in G
    save_quota(prekey+'_number_nodes_'+str(network_type), nnodes)
    
    Hnnodes = len(H.nodes())
    # the number o nodes in H
    ratio_H2G = float(Hnnodes) / float(nnodes)
    print '!!!!!ratio_H2G!!!!!:',ratio_H2G
    #save_quota(prekey + '_ratio_H2G', ratio_H2G)
    
    alldegree = sum(dhistogram)
    ave_degree = float(alldegree) / float(nnodes)
    # ave_degree 平均节点度
    save_quota(prekey+'_ave_degree_'+str(network_type), ave_degree)

    
    nedges = len(G.edges())
    # the number of edged in G
    save_quota(prekey+'_number_edges_'+str(network_type), nedges)
    
    gdiameter = nx.diameter(H) # !!!
    # The diameter is the maximum eccentricity   int-n
    save_quota(prekey+'_diameter_'+str(network_type), gdiameter)

    geccentricity = nx.eccentricity(H) # !!!
    # the eccentricity of nodes in gg
    avegec = get_ave(geccentricity)
    save_quota(prekey+'_ave_eccentricity_'+str(network_type), avegec)

    
    sconnectedn = nx.number_strongly_connected_components(G)
    # 强连通子图数量  int-n
    save_quota(prekey+'_number_strongly_connected_components_'+str(network_type), sconnectedn)
    #maxscc = nx.strongly_connected_component_subgraphs(G).next()
    #print 'maxwcc:', len(maxwcc)
    wconnectesn = nx.number_weakly_connected_components(G)
    # 弱连通子图数量 int-n
    
    save_quota(prekey+'_number_weakly_connected_components_'+str(network_type), wconnectesn)
    maxwcc = nx.weakly_connected_component_subgraphs(G).next()
    print 'maxwcc_G:', len(maxwcc.nodes())
    print '!!!!ratio_maxwcc_G!!!:', float(len(maxwcc.nodes()))/float(nnodes)
    
    aveclustering = nx.average_clustering(gg) # !!!!
    # 平均聚类系数
    save_quota(prekey+'_average_clustering_'+str(network_type), aveclustering)

    dassortativity_coefficient = nx.degree_assortativity_coefficient(G)
    # 同配性系数
    save_quota(prekey + '_degree_assortativity_coefficient_'+str(network_type), dassortativity_coefficient)
    
    #print 'G_edges:', len(G.edges())
    #print 'G_edges:', len(G.selfloop_edges())
    #GG = G
    #GG.remove_edges_from(GG.selfloop_edges())
    #print 'test_edges:',len(GG.edges())
    kcore = nx.core_number(G)
    #print 'kcore:', kcore
    # k_score k核数
    #avekc = get_ave(kcore)
    
    maxkc = get_max(kcore)
    save_quota(prekey + '_max_k_core_'+str(network_type), maxkc)