Esempio n. 1
0
def save_mat(conn_matrix, est_path, fmt='npy'):
    """
    Threshold a diffusion structural connectivity matrix using any of a variety of methods.

    Parameters
    ----------
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    est_path : str
        File path to .npy file containing graph with thresholding applied.
    fmt : str
        Format to save connectivity matrix/graph (e.g. .npy, .pkl, .graphml, .txt, .ssv, .csv). Default is .npy.
    """
    import networkx as nx
    G = nx.from_numpy_array(conn_matrix)
    G.graph['ecount'] = nx.number_of_edges(G)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    if fmt == 'edgelist_csv':
        nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.csv'), encoding='utf-8')
    elif fmt == 'gpickle':
        nx.write_gpickle(G, "%s%s" % (est_path.split('.npy')[0], '.pkl'))
    elif fmt == 'graphml':
        nx.write_graphml(G, "%s%s" % (est_path.split('.npy')[0], '.graphml'))
    elif fmt == 'txt':
        np.savetxt("%s%s" % (est_path.split('.npy')[0], '.txt'), nx.to_numpy_matrix(G))
    elif fmt == 'npy':
        np.save(est_path, nx.to_numpy_matrix(G))
    elif fmt == 'edgelist_ssv':
        nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.ssv'), delimiter=" ", encoding='utf-8')
    else:
        raise ValueError('\nERROR: File format not supported!')

    return
Esempio n. 2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('edgelist',
                        nargs='?',
                        default='analysis/combined_716eef6.prob')
    parser.add_argument('outfile', nargs='?')
    parser.add_argument('-t', '--interconnectivity', default=0.80)
    parser.add_argument('-d', '--density', default=0.80)
    parser.add_argument('-m', '--min-edge', default=0.20)
    args = parser.parse_args()
    if args.outfile == None:
        args.outfile = args.edgelist.replace('.prob', '') + '.analysis.tmp'

    threshold_min_weight = args.min_edge
    threshold_interconnectivity = args.interconnectivity
    threshold_density = args.density

    print_err("Loading graph")
    G_sim = nx.read_weighted_edgelist(enforce_min(
        skip_comments(open(args.edgelist, 'rb')), threshold_min_weight),
                                      nodetype=int,
                                      delimiter=',')
    print_err('Loaded (V={:}, E={:})'.format(len(G_sim), G_sim.size()))

    cc = analyse(G_sim, threshold_interconnectivity)

    nx.write_weighted_edgelist(G_sim.subgraph(cc), args.outfile)
Esempio n. 3
0
def create_tag_tag_graph_weighted():
    t_t = nx.Graph()

    query = QuestionPosts.select(QuestionPosts.Tags) \
        .where(QuestionPosts.AcceptedAnswerId.is_null(False),
               QuestionPosts.ViewCount.is_null(False), QuestionPosts.OwnerUserId.is_null(False))
    tag_results = list(query.dicts())
    tag_to_int = {}
    running_max = 0
    for result in tag_results:
        tags = [x for x in re.split('<|>', result['Tags']) if x]
        for t in tags:
            if t not in tag_to_int.keys():
                tag_to_int[t] = running_max
                running_max += 1

    for result in tag_results:
        tags = [x for x in re.split('<|>', result['Tags']) if x]
        for i in range(len(tags) - 1):
            tt_edge = (tag_to_int[tags[i]], tag_to_int[tags[i + 1]])
            u, v = tt_edge
            if t_t.has_edge(u, v):
                t_t[u][v]['weight'] += 1
            else:
                t_t.add_edge(u, v, weight=1)

    nx.write_weighted_edgelist(t_t, 'Data/T_T_Weighted.edgelist')

    with open(os.path.join(DATA_DIR, 'Tag_To_Node_Weighted'), 'w') as f:
        for k, v in tag_to_int.items():
            f.write(k + ',' + str(v) + "\n")
Esempio n. 4
0
def save_graph(
        ego, G
):  # Função recebe o id do ego corrente e o grafo (lista de arestas)
    with open(output_dir + str(ego) + ".edge_list", 'wb') as graph:
        nx.write_weighted_edgelist(G,
                                   graph)  # Imprimir lista de arestas COM PESO
    G.clear()
Esempio n. 5
0
def syllable_net(corpus, syllable_list, d="directed", w="weighted"):
    """Creates syllable network from co-occurrence of syllables
    within words.

    Parameters
    ----------
    corpus : file
        original text file from which the network
        will be created
    syllable_list :
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two syllables will be proportional to the
        overall frequencies of the corresponding syllables
        co-occurrence within words from a text
    """
    with open(corpus, "r", encoding="utf-8") as f:
        f_r = f.readlines()

    words = [line.split("\t") for line in f_r]

    with open(syllable_list, "r", encoding="utf-8") as f:
        f_r = f.readlines()

    syllables = [line.split() for line in f_r]

    if d == "directed":
        g = nx.DiGraph()
    elif d == "undirected":
        g = nx.Graph()

    syllable_edges = dict()

    for i in words:
        if len(i) > 1:
            for j in syllables:
                if i[1] == j[0]:
                    for l, r in zip(j[1].split("-")[:-1], j[1].split("-")[1:]):
                        edge = (l, r)
                        if edge in syllable_edges:
                            syllable_edges[edge] += 1
                        else:
                            syllable_edges[edge] = 1
                else:
                    g.add_node(i[1])

    edge_list = [(k[0], k[1], v) for (k, v) in syllable_edges.items()]

    if w == "unweighted":
        g.add_edges_from(edge_list)
        nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges")
    elif w == "weighted":
        g.add_weighted_edges_from(edge_list)
        nx.write_weighted_edgelist(
            g,
            corpus.rsplit(".", 1)[0] + "_syllable.edges")

    return g
Esempio n. 6
0
def main(n_start, n_count=1, n_inc=1, c_in_start=10, c_in_count=1, c_in_inc=1, c_out_start=5, c_out_count=1, c_out_inc=1, comm_count = 2, DC=False, i=0):
    bp_uncertain = 'src/bp'

    edge_frac = 1.
    nonedge_mult = 5.
    b = 2
    trials = 2

    os.makedirs('out', exist_ok=True)
    os.makedirs('data', exist_ok=True)

    for n in custom_range(n_start, n_count, n_inc):
        for c_in in custom_range(c_in_start, c_in_count, c_in_inc):
            for c_out in custom_range(c_out_start, c_out_count, c_out_inc):
                original_net = 'data/original_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i)
                uncertain_net = 'data/noisy_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i)
                uncertain_comms = 'out/uncertain_comms-%d-%f-%f-%f-%f-%f-%d.out'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i)
 
                print("making and fuzzing network")
                G_orig = make_net(c_in, c_out, n)
                write_edgelist(G_orig, original_net)
                G, _ = fuzz_network(G_orig, 1, b, edge_frac, nonedge_mult)
                write_weighted_edgelist(G, uncertain_net)
 
                start1 = time()
                print("running belief propagation")
                os.system('%s -i %s -o %s -c %d -l %d -n %d' % (bp_uncertain, uncertain_net, uncertain_comms, comm_count, 3, trials))
                end1 = time()

                with open('out/results.txt', 'a+') as out_file:
                    out_file.write("%d %f %f\t%f %f %f\t %f %f\t %s %d\n" %(n,
                                    c_in, c_out,
                                    b,edge_frac,nonedge_mult,
                                    evaluate(uncertain_comms, n), end1-start1,
                                    str(datetime.now()), i))
def graph_generator(n, m):
    """
    n is the number of nodes and m is the number of edges connected to each new node
    """
    if m < 1 or m >= n:
        print("Network must have m>=1 and m<n" % (m, n))
        return None

    G = nx.barabasi_albert_graph(n, m, seed=24)
    G = G.to_directed()

    for (u, v, w) in G.edges(data=True):
        w['weight'] = random.randint(1, 20)

    nx.draw(G, with_labels=True, with_edges=True)
    plt.axis('off')
    plt.show()
    nx.write_weighted_edgelist(G, 'test.weighted.edgelist')

    with open('test.weighted.edgelist', 'r+') as f:
        content = f.read()
        f.seek(0, 0)
        f.write('A' + '\n' + str(G.order()) + '\n' + content)
    import os
    os.system('cls')
    print(
        "A graph with nodes = ", n, " and edges = ", G.number_of_edges(),
        " has been downloaded to file with the name test.weighted.edgelist as arc matrics"
    )
    return G
Esempio n. 8
0
def step():
    global time, o, g, T, perturbation_period, pert_accu, g_ut
    
    time +=1
    
    if pert_accu == perturbation_period:
        if T > 600 and T < 3000:
            learning()
        pert_accu = 0
        T += 1
        T_list.append( T )
        U_plot.append( global_uo(o) )
        randomize_states(o)
    else:
        pert_accu += 1
    
    i = rd.choice(o.nodes())


    node_state(i)


    if time == 3599998:
    #if time == 3598:
        nx.write_weighted_edgelist(g, 'g_edgelist_end.csv')
        nx.write_weighted_edgelist(o, 'o_edgelist_end.csv') 
def combine_data_get_sp_paths_costs_activated(G_unweighted, SI,
                                              response_nw_fname, paths):
    SI_relevant = mic_fun.get_relevant_SI(SI)

    # Map gene expression values onto unweighted network
    G_response = net_fun.get_activated_response_network(
        SI_relevant, G_unweighted)
    print("Got response network with ", len(G_response.nodes()), " nodes and ",
          len(G_response.edges()), " edges")

    # Drop SI values for genes which don't map to response network
    genes_to_drop = set(SI.index) - set(G_response.nodes())
    SI = SI.drop(genes_to_drop)

    if len(paths) == 0:  # Actual data
        # Write the response network to file
        nx.write_weighted_edgelist(G_response,
                                   response_nw_fname,
                                   delimiter='\t')
        # Get all-pairs-shortest-path costs
        # Return value is a pandas dataframe, indexed by the string src#dest
        Pij = net_fun.get_all_sp_paths_costs(G_response)
        print("Got shortest path costs for ", Pij.shape[0], " node-pairs")
    elif len(paths) > 0:  # Randomized data
        # Get cost of the same paths that are the shortest in the actual dataset
        Pij = net_fun.get_costs_of_given_paths(G_response, paths)
        print("Got cost of paths which are shortest in the actual data")

    return Pij, SI
Esempio n. 10
0
File: graph.py Progetto: j1c/m2g
    def save_graph(self, graphname, fmt='edgelist'):
        """
        Saves the graph to disk

        **Positional Arguments:**

                graphname:
                    - Filename for the graph

        **Optional Arguments:**

                fmt:
                    - Output graph format
        """
        self.g.graph['ecount'] = nx.number_of_edges(self.g)
        g = nx.convert_node_labels_to_integers(self.g, first_label=1)
        if fmt == 'edgelist':
            nx.write_weighted_edgelist(g, graphname, encoding='utf-8')
        elif fmt == 'gpickle':
            nx.write_gpickle(g, graphname)
        elif fmt == 'graphml':
            nx.write_graphml(g, graphname)
        else:
            raise ValueError(
                'edgelist, gpickle, and graphml currently supported')
        pass
Esempio n. 11
0
def write_export(output_directory, export_ref_annotated_format, span, graph):
    if not os.path.exists(output_directory):
        os.mkdir(output_directory)
    if export_ref_annotated_format == "gexf":
        log("write gexf export", span)
        networkx.write_gexf(
            graph, os.path.join(output_directory, "%s_annotated.gexf" % span))
    elif export_ref_annotated_format == "edgelist":
        log("write csv export", span)
        networkx.write_weighted_edgelist(graph,
                                         os.path.join(
                                             output_directory,
                                             "%s_annotated.csv" % span),
                                         delimiter="\t")
    elif export_ref_annotated_format == "pajek":
        log("write pajek export", span)
        networkx.write_pajek(
            graph, os.path.join(output_directory, "%s_annotated.net" % span))
    elif export_ref_annotated_format == "graphml":
        log("write pajek export", span)
        networkx.write_graphml(
            graph, os.path.join(output_directory,
                                "%s_annotated.graphml" % span))
    else:
        log("no compatible export format specified", span)
Esempio n. 12
0
def cleanseWeightedEdgeData(csvfile, cleansedcsvfile, ctyarray):
    """
    There are errors in data pertaining to UN country nodes. These nodes may not exist either due to spelling or they refer to regional entities.
    Such nodes are detected and removed
    :param edgefile:
    :param ctyarray: A list of UN Countries
    :return:
    """
    cleansedfilewithspace = 'temp' + str(random.random()) + '.edgelist'
    graph = nx.read_weighted_edgelist(csvfile,
                                      delimiter=',',
                                      create_using=nx.DiGraph())
    for n in graph.nodes():  #For each node in the graph
        try:
            ctylongform = ctyarray[n]
        except:
            print "This node doesn't exists in country list...dropping node ", n
            graph.remove_node(n)
    nx.write_weighted_edgelist(graph, cleansedfilewithspace)

    edgelist = []
    with open(cleansedfilewithspace, 'r') as csvfile:
        countryreader = csv.reader(csvfile, delimiter=' ')
        for row in countryreader:
            src = row[0]
            tgt = row[1]
            weight = row[2]
            edgelist.append([src, tgt, weight])

    with open(cleansedcsvfile, 'wb') as csvfile:
        edgewriter = csv.writer(csvfile, delimiter=',')
        for item in edgelist:
            edgewriter.writerow([item[0], item[1], item[2]])
Esempio n. 13
0
def write_output(sdp_results, outfname, subgraphfile):
    S, obj, obj_rounded = sdp_results

    print "Returning subgraph with OQC score", obj_rounded, "(%s)" % obj
    n = len(S)
    if 'weight' in S.edges_iter(data=True).next()[2]:
        e = sum(data['weight'] for u, v, data in S.edges_iter(data=True))
    else:
        e = S.number_of_edges()
    header = "|S|,|E|,density,diameter,triangle density,OQC,obj\n"
    with open(outfname, 'w') as f:
        f.write(header)
        if n > 0:
            f.write(str(n) + ',')
            f.write(str(S.number_of_edges()) + ',')
            if n > 1:
                f.write(str(2. * e / (n * (n - 1))) + ',')
            else:
                f.write(str(0) + ',')
            if nx.is_connected(S):
                f.write(str(nx.diameter(S)) + ',')
            else:
                f.write('inf,')
            if n > 2:
                f.write(str(2. * sum(i for i in nx.triangles(S).itervalues()) / (n * (n - 1) * (n - 2))) + ',')
            else:
                f.write(str(0) + ',')
            f.write(str(obj_rounded) + ',')
            f.write(str(obj) + '\n')
        else:
            f.write("0,0,0,0,0,0,0,")
            f.write("%s\n" % obj)
    nx.write_weighted_edgelist(S, subgraphfile)
Esempio n. 14
0
def export(graph, span):
    if CONFIG["export_ref_format"] == "gexf":
        print("Writing .gexf export")
        networkx.write_gexf(graph,
                            os.path.join(CONFIG["parsed_data"], span,
                                         "%s.gexf" % span),
                            encoding="UTF-8")
    elif CONFIG["export_ref_format"] == "edgelist":
        print("Writing .csv export")
        networkx.write_weighted_edgelist(graph,
                                         os.path.join(CONFIG["parsed_data"],
                                                      span, "%s.csv" % span),
                                         delimiter="\t")
    elif CONFIG["export_ref_format"] == "pajek":
        print("Writing .pajek export")
        networkx.write_pajek(graph,
                             os.path.join(CONFIG["parsed_data"], span,
                                          "%s.net" % span),
                             encoding='UTF-8')
    elif CONFIG["export_ref_format"] == "json":
        print("Writing .json export")
        data = json_graph.node_link_data(graph)
        json.dump(data,
                  open(
                      os.path.join(CONFIG["parsed_data"], span,
                                   "%s.json" % span), "w"),
                  encoding='UTF-8')
    else:
        print("No export compatible with the specified export format!")
Esempio n. 15
0
 def MCLAlgorithm(self, inflation=3.3):
     """
     Metoda wykonuje grupowanie za pomocą algorytmu MCL
     
     @param inflation: wartość współczynnika inflacji algorytmu MCL
     @requires: program MCL w ścieżce wykonywalnej 
     @rtype: list
     @return: lista list z członkami grup
     """
     
     try:
         nx.write_weighted_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t")
     except:
         nx.write_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t")
     import os
     logger.debug("Invoking mcl command ...")
     os.system("mcl /tmp/mcl-input --abc -te 2 -I %f -o /tmp/mcl-output" % inflation)
     logger.debug("MCL clustering done")
     
     out_file = open("/tmp/mcl-output", 'r')
     lines = out_file.readlines()
     
     partition = list()
     
     import string
     for line in lines:
         partition.append(map(int, string.split(line)))
     
     return partition
Esempio n. 16
0
def init_minimal():
    global g, o, file_num
    g = nx.complete_graph(args.nodes)

    for n in g.nodes():
        g.node[n]['s'] = 1

    for i,j in g.edges():
        g.edge[i][j]['weight'] = 0

    o = g.copy()

    #for i,j in o.edges():
        #o.edge[i][j]['weight'] = rd.choice([1,-1])

    for i,j in o.edges():
         if rd.random() < 0.07:
             o.edge[i][j]['weight'] = rd.choice([1,-1])
    
    # o.edge[0][1]['weight'] = 1
    # o.edge[0][2]['weight'] = 1
    # o.edge[1][2]['weight'] = 1

    
    nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_%d.csv' % (args.runid, file_num))
    nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_%d.csv' % (args.runid, file_num))
Esempio n. 17
0
def saveWeightedGraphAsCSV(graph, tocsvfile):
    """
    Save NetworkX weighted graph object into an edge file (CSV).
    NetworkX save it as a file with spaces, which cannot be properly processed by Excel for further manipulation.
    Thus will convert it into proper CSV myself.
    :param graph:
    :param tocsvfile:
    :return:
    """
    cleansedfilewithspace = 'temp' + str(random.random()) + '.edgelist'
    print "Size of this subgraph: ", len(
        graph.nodes()), " First node is: ", graph.nodes()[0]
    nx.write_weighted_edgelist(graph, path=cleansedfilewithspace)
    edgelist = []
    with open(cleansedfilewithspace, 'r') as csvfile:
        countryreader = csv.reader(csvfile, delimiter=' ')
        for row in countryreader:
            src = row[0]
            tgt = row[1]
            weight = row[2]
            edgelist.append([src, tgt, weight])

    with open(tocsvfile, 'wb') as csvfile:
        edgewriter = csv.writer(csvfile, delimiter=',')
        for item in edgelist:
            edgewriter.writerow([item[0], item[1], item[2]])
Esempio n. 18
0
File: graph.py Progetto: gkiar/ndmg
    def save_graph(self, graphname, fmt='edgelist'):
        """
        Saves the graph to disk

        **Positional Arguments:**

                graphname:
                    - Filename for the graph

        **Optional Arguments:**

                fmt:
                    - Output graph format
        """
        self.g.graph['ecount'] = nx.number_of_edges(self.g)
        g = nx.convert_node_labels_to_integers(self.g, first_label=1)
        if fmt == 'edgelist':
            nx.write_weighted_edgelist(g, graphname, encoding='utf-8')
        elif fmt == 'gpickle':
            nx.write_gpickle(g, graphname)
        elif fmt == 'graphml':
            nx.write_graphml(g, graphname)
        else:
            raise ValueError('edgelist, gpickle, and graphml currently supported')
        pass
Esempio n. 19
0
def save_mat(conn_matrix, est_path, fmt='npy'):
    """

    :param conn_matrix:
    :param est_path:
    :param fmt:
    :return:
    """
    import networkx as nx
    G = nx.from_numpy_array(conn_matrix)
    G.graph['ecount'] = nx.number_of_edges(G)
    G = nx.convert_node_labels_to_integers(G, first_label=1)
    if fmt == 'edgelist_csv':
        nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.csv'), encoding='utf-8')
    elif fmt == 'gpickle':
        nx.write_gpickle(G, "%s%s" % (est_path.split('.npy')[0], '.pkl'))
    elif fmt == 'graphml':
        nx.write_graphml(G, "%s%s" % (est_path.split('.npy')[0], '.graphml'))
    elif fmt == 'txt':
        np.savetxt("%s%s" % (est_path.split('.npy')[0], '.txt'), nx.to_numpy_matrix(G))
    elif fmt == 'npy':
        np.save(est_path, nx.to_numpy_matrix(G))
    elif fmt == 'edgelist_ssv':
        nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.ssv'), delimiter=" ", encoding='utf-8')
    else:
        raise ValueError('\nERROR: File format not supported!')

    return
Esempio n. 20
0
def clean(target, verbose=False, **kwargs):
    """Cleans the target dataset by making the graph undirected and connected"""
    if target == "all":
        for t in GRAPHS.keys():
            clean(t)
        return
    if target not in GRAPHS.keys():
        raise ValueError("Unknown target.")
    print("Cleaning dataset {}...".format(target))
    target_dir = os.path.join(GRAPH_DIR, target)
    edgelist_filename = os.path.join(target_dir, GRAPHS[target]["edgelist"])
    basename, ext = os.path.splitext(edgelist_filename)
    weighted_edgelist_filename = "{}_weighted{}".format(basename, ext)
    if target == "PPI":
        with open(edgelist_filename, 'r') as f:
            G = nx.readwrite.json_graph.node_link_graph(json.load(f))
        M = sorted([a for a in nx.connected_component_subgraphs(G) if len(a) > 100], key=len)
        f = pd.DataFrame(np.load(os.path.join(target_dir, "ppi/ppi/ppi-feats.npy")))
        c = pd.read_json(os.path.join(target_dir, "ppi/ppi/ppi-class_map.json")).T
        for i, a in enumerate(M):
            nx.write_edgelist(a, os.path.join(target_dir, "ppi_{:02}.edgelist".format(i+1)))
            nodes = [n for n in a.nodes()]
            fi = f.loc[nodes, :]
            ci = c.loc[nodes, :]
            ci.to_json(os.path.join(target_dir, "ppi_{:02d}.classes".format(i+1)))
            fi.to_json(os.path.join(target_dir, "ppi_{:02d}.features".format(i+1)))
    else:
        G = nx.read_edgelist(edgelist_filename, nodetype=int)
        G = max(nx.connected_component_subgraphs(G), key=len)
        nx.write_edgelist(G, edgelist_filename)
        for _, _, d in G.edges(data=True):
            if "weight" not in d:
                d["weight"] = 1
        nx.write_weighted_edgelist(G, weighted_edgelist_filename)
Esempio n. 21
0
def generate_failure_network2():
    g = nx.read_weighted_edgelist('edgelist/ninux/0', nodetype=int)
    index = 0
    for i in range(10):
        random.seed(1234)
        bc = nx.betweenness_centrality(g)
        nodes = [(k, v) for k, v in bc.items()]
        nodes.sort(key=lambda x: x[1], reverse=True)
        to_rem = 0
        i = 0
        for node in nodes:
            g1 = g.copy()
            to_rem = node[0]
            g1.remove_node(node[0])
            if nx.is_connected(g1):
                print(i)
                break
            i += 1
        g.remove_node(to_rem)
        #print(nx.number_connected_components(g))
        for j in range(4):
            nx.write_weighted_edgelist(g, 'edgelist/testdata2/' + str(index))
            nx.write_weighted_edgelist(
                g, 'edgelist/testdata2/' + str(40 * 2 - 1 - index))
            index += 1
Esempio n. 22
0
def weight_preserving_configuration_model(G, filename=' '):
    import random as rn
    import time
    weight_dictionary = nx.get_edge_attributes(G, 'weight')
    weight_sequence = weight_dictionary.values()
    degree_sequence = list(nx.degree(G).values())

    rn.seed(rn.randint(0, 1000000) + time.time())
    E = nx.configuration_model(degree_sequence)
    E = nx.Graph(E)
    E.remove_edges_from(E.selfloop_edges())
    weight_sequence_temp = weight_sequence
    for t in range(100):
        rn.shuffle(weight_sequence_temp)

    for e in E.edges_iter():
        E.edge[e[0]][e[1]]['weight'] = weight_sequence_temp[0]
        weight_sequence_temp = weight_sequence_temp[1:]

    if filename != ' ':
        nx.write_weighted_edgelist(E,
                                   filename,
                                   delimiter=' ',
                                   encoding='utf-8')
        print('Randomized edgelist dumped to ' + filename)

    return E
Esempio n. 23
0
def save_graph(G,
               output_path,
               delimiter=',',
               write_stats=True,
               write_weights=False,
               write_dir=True):
    r"""
    Saves a graph to a file as an edgelist of weighted edgelist. If the stats parameter is set to True the file
    will include several lines containing the same basic graph statistics as provided by the get_stats function.
    For undirected graphs, the method stores both directions of every edge.

    Parameters
    ----------
    G : graph
       A NetworkX graph
    output_path : file or string
       File or filename to write. If a file is provided, it must be
       opened in 'wb' mode.
    delimiter : string, optional
       The string used to separate values. Default is ','.
    write_stats : bool, optional
        Sets if graph statistics should be added to the edgelist or not. Default is True.
    write_weights : bool, optional
        If True data will be stored as weighted edgelist (e.g. triplets src, dst, weight) otherwise as normal edgelist.
        If the graph edges have no weight attribute and this parameter is set to True,
        a weight of 1 will be assigned to each edge. Default is False.
    write_dir : bool, optional
        This option is only relevant for undirected graphs. If False, the graph will be stored with a single
        direction of the edges. If True, both directions of edges will be stored. Default is True.
    """
    # Write the graph stats in the file if required
    if write_stats:
        get_stats(G, output_path)

    # Open the file where data should be stored
    f = open(output_path, 'a+b')

    # Write the graph to a file and use both edge directions if graph is undirected
    if G.is_directed():
        # Store edgelist
        if write_weights:
            J = nx.DiGraph()
            J.add_weighted_edges_from(G.edges.data('weight', 1))
            nx.write_weighted_edgelist(J, f, delimiter=delimiter)
        else:
            nx.write_edgelist(G, f, delimiter=delimiter, data=False)
    else:
        if write_dir:
            H = nx.to_directed(G)
            J = nx.DiGraph()
        else:
            H = G
            J = nx.DiGraph()
        # Store edgelist
        if write_weights:
            J.add_weighted_edges_from(H.edges.data('weight', 1))
            nx.write_weighted_edgelist(J, f, delimiter=delimiter)
        else:
            nx.write_edgelist(H, f, delimiter=delimiter, data=False)
def main(_):
    dataset_path = Path(FLAGS.dataset_path)
    item_name = FLAGS.amazon_reviews.split("5")[0][:-1] if FLAGS.item == "amazon" else FLAGS.item
    if not FLAGS.text_embeddings:
        text_file_path = dataset_path / FLAGS.file

        if FLAGS.item == "keen":
            data = load_jsonl(text_file_path)
            all_keens = keen.get_keens(data)

            # keeps keens with at least one gem
            keens = {k: v for k, v in all_keens.items() if v.gems}
            print(f"Total amount of keens: {len(all_keens)}")
            print(f"Keen with at least one gem: {len(keens)}")

            if FLAGS.item == "keen":
                texts = keen.build_texts_from_keens(keens)
            else:
                texts = keen.build_texts_from_gems(keens)
        elif FLAGS.item == "ml-1m":
            texts = movielens.build_texts_from_movies(text_file_path)
        elif FLAGS.item == "amazon":
            texts = amazon.build_text_from_items(dataset_path, FLAGS.amazon_reviews, FLAGS.amazon_meta)
        else:
            raise ValueError(f"Unrecognized item: {FLAGS.item}")

        print(f"Items with text from {item_name} to encode with USE: {len(texts)}")
        print(list(texts.items())[:3])

        weight_first_embed = FLAGS.item == "keen" or "amazon" in FLAGS.dataset_path
        embeds = build_item_embeds(texts, FLAGS.use_model_url, weight_first_embedding=weight_first_embed)
        export_text_embeddings(embeds, dataset_path, item_name)
    else:
        embeds = load_text_embeddings(FLAGS.text_embeddings)

    if FLAGS.debug:
        embeds = {k: v for k, v in list(embeds.items())[:50]}

    if len(embeds) < FLAGS.max_embedding_len:
        item_ids, cossim_matrix = build_cossim_matrix(embeds)
        graph = build_graph(item_ids, cossim_matrix, FLAGS.threshold, FLAGS.use_distance)
    else:
        # if there are N embeddings, the cossim_matrix is N^2, which might not fit in memory for large values of N.
        # In this case, we compute the cosine similarity for each pair of embeddings, but this is deadly slow
        graph = build_graph_from_embeds(embeds, FLAGS.threshold, FLAGS.use_distance)

    print(f"Graph info:\n{nx.info(graph)}")
    if FLAGS.plot:
        plot_graph(graph, dataset_path / f'{FLAGS.item}_{FLAGS.item}_graph_th{FLAGS.threshold}.png')

    neighs_and_dists = get_neighbors_with_distances(graph)
    result = {"item_item_distances": neighs_and_dists}

    # stores graph
    graph_file_name = f'{item_name}_th{FLAGS.threshold}_graph.edgelist'
    nx.write_weighted_edgelist(graph, str(dataset_path / graph_file_name))
    # stores distances for preprocessing
    file_name = f'{item_name}_th{FLAGS.threshold}_{"cos" if FLAGS.use_distance else "hop"}distances.pickle'
    save_as_pickle(dataset_path / file_name, result)
def print_influence_zone(G, node_wts, fc, noi, downstream, upstream):
	nodes = set(downstream).union(set(upstream))
	inf_zone = G.subgraph(nodes)
	nx.write_weighted_edgelist(inf_zone, noi+'_inf_zone.txt')
	with open(noi+'_inf_zone_nodes.txt', 'wb') as f:
		f.write( "\t".join(["node", "weight", "fold_change"]) + "\n" )
		for node in nodes:
			f.write( "\t".join([node, node_wts[node], fc[node]]) + "\n" )
Esempio n. 26
0
def data():
    global time, o, g, file_num
    nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_end_%d.csv' % (args.runid, file_num))
    nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_end_%d.csv' % (args.runid, file_num))
    GU = open('run_%s_gu_%d.txt' % (args.runid, file_num), 'w')
    gu = global_uo(o)
    GU.write(str(gu))
    GU.close()
Esempio n. 27
0
def save_graph(graph, name):
    if graph is None or not USE_GRAPH_FILE:
        return

    path = get_graph_path(name)

    if not os.path.exists(path):
        nx.write_weighted_edgelist(graph, path, encoding="utf-8")
Esempio n. 28
0
 def save(self):
     """
     Saves itself to a file. The data structure could get quite large, caching to disk is a good idea
     
     ** note ** replace with Redis in production -- Redis dependency is removed for Open Source release to decrease complexity
     """
     l.info("<<<<<<< SAVING WORD-GRAPH >>>>>>>")
     net.write_weighted_edgelist(self.word_graph, "wordgraph_edgelist.txt")
Esempio n. 29
0
def buildGraph(cosin_similarities, data_set_name, EdgeLists_folder):
    G = nx.from_numpy_array(cosin_similarities)
    filter = [(u, v, d) for (u, v, d) in G.edges(data=True)
              if (u < v and d['weight'] > 0.95 and u != v)]
    G_filter = nx.Graph(filter)
    PATH = os.path.join(EdgeLists_folder, data_set_name)
    nx.write_weighted_edgelist(G_filter, PATH + ".file")
    return G_filter
Esempio n. 30
0
 def save(self):
     """
     Saves itself to a file. The data structure could get quite large, caching to disk is a good idea
     
     ** note ** replace with Redis in production -- Redis dependency is removed for Open Source release to decrease complexity
     """
     l.info("<<<<<<< SAVING WORD-GRAPH >>>>>>>")
     net.write_weighted_edgelist(self.word_graph,"wordgraph_edgelist.txt")
Esempio n. 31
0
def syllable_net(corpus, syllable_list, d="directed", w="weighted"):
    """Creates syllable network from co-occurrence of syllables
    within words.

    Parameters
    ----------
    corpus : file
        original text file from which the network
        will be created
    syllable_list :
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two syllables will be proportional to the
        overall frequencies of the corresponding syllables
        co-occurrence within words from a text
    """
    with open(corpus, "r", encoding="utf-8") as f:
        f_r = f.readlines()

    words = [line.split("\t") for line in f_r]

    with open(syllable_list, "r", encoding="utf-8") as f:
        f_r = f.readlines()

    syllables = [line.split() for line in f_r]

    if d == "directed":
        g = nx.DiGraph()
    elif d == "undirected":
        g = nx.Graph()

    syllable_edges = dict()

    for i in words:
        if len(i) > 1:
            for j in syllables:
                if i[1] == j[0]:
                    for l, r in zip(j[1].split("-")[:-1], j[1].split("-")[1:]):
                        edge = (l, r)
                        if edge in syllable_edges:
                            syllable_edges[edge] += 1
                        else:
                            syllable_edges[edge] = 1
                else:
                    g.add_node(i[1])

    edge_list = [(k[0], k[1], v) for (k, v) in syllable_edges.items()]

    if w == "unweighted":
        g.add_edges_from(edge_list)
        nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges")
    elif w == "weighted":
        g.add_weighted_edges_from(edge_list)
        nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges")

    return g
Esempio n. 32
0
def findTEdges(G2005, G2006):
    G = nx.Graph()
    for e in G2006.edges():
        if not G2005.has_edge(
                *e):  #e does not exist in G2005 but exists in G2006
            #edges.append(e)
            G.add_edge(*e)

    with open('T.edgelist', 'wb+') as fp:
        nx.write_weighted_edgelist(G, fp, delimiter='*')
Esempio n. 33
0
def write_edgelist(path_to_file):
    graphe = nx.read_gexf(path_to_file + ".gexf")
    for source, target in graphe.edges():
        if source == target :
            graphe.remove_edge(source,target)
        else:
            graphe[source][target].clear()
            graphe[source][target]['weight'] = 1

    nx.write_weighted_edgelist(nx.convert_node_labels_to_integers(graphe), path_to_file + ".txt")
Esempio n. 34
0
def target_edges(G1, G2):
    target_graph = nx.Graph()
    #loop over edges in graph2
    for edges in G2.edges():
        #if it is not included in graph1 then add it to target graph
        if not G1.has_edge(*edges):
            target_graph.add_edge(*edges)
    file = open("target_graph.txt", "wb+")
    nx.write_weighted_edgelist(target_graph, file, delimiter=',')
    file.close()
    print(len(target_graph.edges))
Esempio n. 35
0
def main():
    for data_set_name in [
            'airport', 'authors', 'collaboration', 'facebook', 'congress',
            'forum'
    ]:
        graph = networkx.read_weighted_edgelist('../graph/' + data_set_name +
                                                '.tsv')
        graph = networkx.convert_node_labels_to_integers(graph)
        networkx.write_weighted_edgelist(graph,
                                         '../reindexed_graphs/' +
                                         data_set_name + '.tsv',
                                         delimiter='\t')
Esempio n. 36
0
    def write_networks(self):
        """
        Writes all networks in a Nets file to graphml files.

        :return:
        """
        try:
            for network in self.networks:
                path = self.inputs['fp'] + '/' + network + '.txt'
                nx.write_weighted_edgelist(G=self.networks[network], path=path)
        except Exception:
            logger.error("Unable to write networks to disk. ", exc_info=True)
Esempio n. 37
0
 def save_graph(self, OUT_PATH):
     """
     Save the graph in OUT_PATH
     """
     if self.graphtool:
         for i, cost_class in enumerate(self.cost_classes):
             self.graph.edge_properties[cost_class] = self.cost_props[i]
         self.graph.edge_properties["weight"] = self.weight
         self.graph.save(OUT_PATH + ".xml.gz")
     else:
         nx.write_weighted_edgelist(self.graph,
                                    OUT_PATH + '.weighted.edgelist')
Esempio n. 38
0
def update_wrapper(infoname, priorname, outname, outavgname):
    G = update(infoname, priorname)
    for e in G.edges():
        print G[e[0]][e[1]]['params']
        print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m')
    nx.write_edgelist(G,outname)
    A = G.copy()
    for e in A.edges():
        p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m')
#       if p == nan: A[e[0]][e[1]]['weight'] = 0
        A[e[0]][e[1]]['weight'] = p
    nx.write_weighted_edgelist(A,outavgname,delimiter=',')
Esempio n. 39
0
def original_generate_token_graph():
    corp = []
    sentences = []      # Initialize an empty list of sentences
    
    input_folders = [ sub_dir for sub_dir in listdir(dataset_folder) if isdir(join(dataset_folder, sub_dir)) ]
    
    for folder in input_folders:
        dir_path = dataset_folder + os.sep + folder + os.sep
        files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
        
        for file in files:
            file_path = dir_path + file
            file_name, file_extension = splitext(file_path)
            doc = ""
            
            if file_extension == ".pdf":
                doc = convert_pdf_to_txt(file_path)
            elif file_extension == ".docx":
                doc = convert_docx_to_txt(file_path)
            else:
                continue
                
            if doc != "":
                doc = doc.decode("utf8")
                #doc = words_to_phrases(doc)
                doc = doc.lower()
                doc = doc_to_wordlist(doc,True)
                corp = it.chain(corp,doc)
                #sentences += doc_to_sentences(doc, tokenizer, remove_stopwords=False)
    
    corp = list(corp)
    graph = nx.Graph()
    weights = Counter()
    edges = set()
    window = corp[0:5]
    
    for tup in it.permutations(window,2):
        weights[tup] += 1
    for i in range(3,len(corp)-2):
        for j in range(i-2,i+2):
            weights[(corp[j],corp[i+2])] += 1
            weights[(corp[i+2],corp[j])] += 1
            edges.add((corp[i+2],corp[j]))
            
    for e in edges:
        graph.add_edge(e[0], e[1], {'weight':weights[e]})
    
    print graph
    nx.write_weighted_edgelist(graph, "graph.g")
    print nx.to_numpy_matrix(graph)
    np.savetxt("graph.adj", nx.to_numpy_matrix(graph))
    print "finished"
Esempio n. 40
0
def ego_word_subnet(word_network,
                    word,
                    radius=1,
                    d="directed",
                    w="weighted",
                    neighborhood="all"):
    """Creates word-ego network which is a subnetwork of
    neighbours centered at one specified node (word)
    within a given radius.

    Parameters
    ----------
    word_network : edge list of original network
    word : string
        subnetwork will be created of neighbours
        centered at specified word
    radius : int
        radius from which subnetwork will be created
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two words will be proportional to the
        overall frequencies of the corresponding words
        co-occurrence within a original network
    neighborhood : successors, predecessors or all
    """
    if d == "directed":
        word_net = nx.read_weighted_edgelist(word_network,
                                             create_using=nx.DiGraph())
        if neighborhood == "successors":
            sg = nx.ego_graph(word_net, word, radius)
        elif neighborhood == "predecessors":
            sg = nx.ego_graph(word_net.reverse(), word, radius)
        elif neighborhood == "all":
            sg = nx.ego_graph(word_net, word, radius, undirected=True)

    elif d == "undirected":
        word_net = nx.read_weighted_edgelist(word_network)
        sg = nx.ego_graph(word_net, word, radius)

    if w == "unweighted":
        nx.write_edgelist(
            sg,
            word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges")
    elif w == "weighted":
        nx.write_weighted_edgelist(
            sg,
            word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges")

    return sg
def main(n_start,
         n_count=1,
         n_inc=1,
         c_in_start=10,
         c_in_count=1,
         c_in_inc=1,
         c_out_start=5,
         c_out_count=1,
         c_out_inc=1,
         comm_count=2,
         DC=False,
         i=0):
    bp_uncertain = 'src/bp'

    edge_frac = 1.
    nonedge_mult = 5.
    b = 2
    trials = 2

    os.makedirs('out', exist_ok=True)
    os.makedirs('data', exist_ok=True)

    for n in custom_range(n_start, n_count, n_inc):
        for c_in in custom_range(c_in_start, c_in_count, c_in_inc):
            for c_out in custom_range(c_out_start, c_out_count, c_out_inc):
                original_net = 'data/original_net-%d-%f-%f-%f-%f-%f-%d.edges' % (
                    n, c_in, c_out, b, edge_frac, nonedge_mult, i)
                uncertain_net = 'data/noisy_net-%d-%f-%f-%f-%f-%f-%d.edges' % (
                    n, c_in, c_out, b, edge_frac, nonedge_mult, i)
                uncertain_comms = 'out/uncertain_comms-%d-%f-%f-%f-%f-%f-%d.out' % (
                    n, c_in, c_out, b, edge_frac, nonedge_mult, i)

                print("making and fuzzing network")
                G_orig = make_net(c_in, c_out, n)
                write_edgelist(G_orig, original_net)
                G, _ = fuzz_network(G_orig, 1, b, edge_frac, nonedge_mult)
                write_weighted_edgelist(G, uncertain_net)

                start1 = time()
                print("running belief propagation")
                os.system('%s -i %s -o %s -c %d -l %d -n %d' %
                          (bp_uncertain, uncertain_net, uncertain_comms,
                           comm_count, 3, trials))
                end1 = time()

                with open('out/results.txt', 'a+') as out_file:
                    out_file.write("%d %f %f\t%f %f %f\t %f %f\t %s %d\n" %
                                   (n, c_in, c_out, b, edge_frac, nonedge_mult,
                                    evaluate(uncertain_comms, n),
                                    end1 - start1, str(datetime.now()), i))
Esempio n. 42
0
def write_edgelist(path_to_file):
    graphe = nx.read_gexf(path_to_file + ".gexf")
    if type(graphe) == nx.MultiDiGraph:
        print "has_multiple_edges"
        graphe = nx.DiGraph(graphe)
    if type(graphe) == nx.MultiGraph:
        print "has_multiple_edges"
        graphe = nx.Graph(graphe)

    for source, target in graphe.edges():
        graphe[source][target].clear()
        graphe[source][target]["weight"] = 1

    nx.write_weighted_edgelist(nx.convert_node_labels_to_integers(graphe), path_to_file + ".txt")
Esempio n. 43
0
def KCored(G):
	# Set k value
	k_values = []
	# k = 0.0
	nodes = G.nodes()
	for node in nodes:
		k_values.append(G.degree(node))
	k_values = sorted(k_values)
	k = k_values[len(k_values)/2]
	# print clusterFile, k
	# print min(k_values)
	# print max(k_values)	
	subG = nx.k_core(G, k=k) # Returns subgraph
	# print len(G.nodes()), '\t', len(subG.nodes())
	nx.write_weighted_edgelist(subG, outDirK + clusterFile, 'w')
Esempio n. 44
0
def init_full():
    global g, o, file_num
    
    randomize_states(g)
    
    for i,j in g.edges():
        g.edge[i][j]['weight'] = 0

    o = g.copy()
    for i,j in o.edges():
        if rd.random() < 0.1:
            o.edge[i][j]['weight'] = rd.choice([1,-1])
            
    nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_%d.csv' % (args.runid, file_num))
    nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_%d.csv' % (args.runid, file_num))
Esempio n. 45
0
def init_full():
    global g, o
    

    randomize_states(g)
    
    for i,j in g.edges():
        g.edge[i][j]['weight'] = 0

    o = g.copy()
    for i,j in o.edges():
        if rd.random() < 0.07:
            o.edge[i][j]['weight'] = rd.choice([1,-1])
            

    nx.write_weighted_edgelist(g, 'g_edgelist.csv')
    nx.write_weighted_edgelist(o, 'o_edgelist.csv')
Esempio n. 46
0
def main():
    infoname = 'sim_data_inferred.txt'
    priorname = 'sim_data_prior.txt'
    outname = 'updated.txt'
    outavgname = 'updated_avg.txt'
    
    G = update(infoname, priorname)
    for e in G.edges():
        print G[e[0]][e[1]]['params']
        print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=1./G[e[0]][e[1]]['params'][1]).stats(moments='m')
    nx.write_edgelist(G,outname)
    A = G.copy()
    for e in A.edges():
        p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=1./G[e[0]][e[1]]['params'][1]).stats(moments='m')
#       if p == nan: A[e[0]][e[1]]['weight'] = 0
        A[e[0]][e[1]]['weight'] = p
    nx.write_weighted_edgelist(A,outavgname,delimiter=',')
Esempio n. 47
0
def data():
    global time, o, g, file_num
    UO = []
    nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_end_%d.csv' % (args.runid, file_num))
    nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_end_%d.csv' % (args.runid, file_num))
    GU = open('run_%s_gu_%d.txt' % (args.runid, file_num), 'w')
    gu = global_uo(o)
    GU.write(str(gu))
    GU.close()

    LU = open('run_%s_UO_%d.txt' % (args.runid, file_num), 'w')
    for i in o.nodes():
        UO.append( local_uo( i, o ))
        lo_sum =sum(UO)
    LU.write(str(UO))
    LU.close()
    print lo_sum
Esempio n. 48
0
def grapheme_net(syllable_network, d="directed", w="weighted"):
    """Creates grapheme network.

    The structure of grapheme network depends on a
    existing network of syllables.

    Two graphemes are linked if they co-occur as neighbours
    within a syllable.

    Parameters
    ----------
    syllable_network : edge list of a syllable network
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two graphemes will be proportional to the
        overall frequencies of the corresponding graphemes
        co-occurring within syllable from a syllable network
    """
    if d == "directed":
        syllable_net = nx.read_weighted_edgelist(syllable_network, create_using=nx.DiGraph())
        g = nx.DiGraph()
    elif d == "undirected":
        syllable_net = nx.read_weighted_edgelist(syllable_network)
        g = nx.Graph()

    for node in syllable_net.nodes():
        graphemes = list(node)
        for i, gr in enumerate(graphemes):
            if i > 0:
                if w == "weighted":
                    if g.has_edge(graphemes[i - 1], graphemes[i]):
                        g[graphemes[i - 1]][graphemes[i]]['weight'] += 1
                    else:
                        g.add_edge(graphemes[i - 1], graphemes[i], weight=1)
                elif w == "unweighted":
                    g.add_edge(graphemes[i - 1], graphemes[i])

    if w == "unweighted":
        nx.write_edgelist(g, syllable_network.rsplit(".", 1)[0] + "_grapheme.edges")
    elif w == "weighted":
        nx.write_weighted_edgelist(g, syllable_network.rsplit(".", 1)[0] + "_grapheme.edges")

    return g
Esempio n. 49
0
    def save_graph(self, graphname):
        """
        Saves the graph to disk
        **Positional Arguments:**
                graphname:
                    - Filename for the graph
        """
        import numpy as np
        import networkx as nx
        if self.modal == 'dwi':
            self.g.graph['ecount'] = nx.number_of_edges(self.g)
            nx.write_weighted_edgelist(self.g, graphname, delimiter=",")

        elif self.modal == 'func':
            np.savetxt(graphname, self.g, comments='', delimiter=',',
                header=','.join([str(n) for n in self.n_ids]))
        else:
            raise ValueError("Unsupported Modality.")
        pass
Esempio n. 50
0
def ego_word_subnet(word_network, word, radius=1, d="directed", w="weighted", neighborhood="all"):
    """Creates word-ego network which is a subnetwork of
    neighbours centered at one specified node (word)
    within a given radius.

    Parameters
    ----------
    word_network : edge list of original network
    word : string
        subnetwork will be created of neighbours
        centered at specified word
    radius : int
        radius from which subnetwork will be created
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two words will be proportional to the
        overall frequencies of the corresponding words
        co-occurrence within a original network
    neighborhood : successors, predecessors or all
    """
    if d == "directed":
        word_net = nx.read_weighted_edgelist(word_network, create_using=nx.DiGraph())
        if neighborhood == "successors":
            sg = nx.ego_graph(word_net, word, radius)
        elif neighborhood == "predecessors":
            sg = nx.ego_graph(word_net.reverse(), word, radius)
        elif neighborhood == "all":
            sg = nx.ego_graph(word_net, word, radius, undirected=True)

    elif d == "undirected":
        word_net = nx.read_weighted_edgelist(word_network)
        sg = nx.ego_graph(word_net, word, radius)

    if w == "unweighted":
        nx.write_edgelist(sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges")
    elif w == "weighted":
        nx.write_weighted_edgelist(sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges")

    return sg
Esempio n. 51
0
def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('edgelist', nargs='?', default='analysis/combined_716eef6.prob')
	parser.add_argument('outfile', nargs='?')
	parser.add_argument('-t', '--interconnectivity', default=0.80)
	parser.add_argument('-d', '--density', default=0.80)
	parser.add_argument('-m', '--min-edge', default=0.20)
	args = parser.parse_args()
	if args.outfile == None:
		args.outfile = args.edgelist.replace('.prob','') + '.analysis.tmp'

	threshold_min_weight = args.min_edge
	threshold_interconnectivity = args.interconnectivity
	threshold_density = args.density

	print_err("Loading graph")
	G_sim = nx.read_weighted_edgelist(enforce_min(skip_comments(open(args.edgelist, 'rb')), threshold_min_weight), nodetype=int, delimiter=',')
	print_err('Loaded (V={:}, E={:})'.format(len(G_sim), G_sim.size()))

	cc = analyse(G_sim, threshold_interconnectivity)	
	
	nx.write_weighted_edgelist(G_sim.subgraph(cc), args.outfile)
Esempio n. 52
0
def wordlist_subnet(word_network, word, words_file, d="directed", w="weighted"):
    """Creates word-list network which is a simple subnetwork
    based on provided list of words.

    Parameters
    ----------
    word_network : edge list of the original network
    word : string
    words_file : file
        file containing words that will be extracted
        from the original network
    d : directed or undirected
        type of graph
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two words will be proportional to the
        overall frequencies of the corresponding words
        co-occurrence within a original network
    """
    with open(words_file, "r", encoding="utf-8") as f:
        word_list = f.read().splitlines()

    if word not in word_list:
        word_list.append(word)

    if d == "directed":
        word_net = nx.read_weighted_edgelist(word_network, create_using=nx.DiGraph())
        sg = nx.DiGraph(word_net.subgraph(word_list))
    elif d == "undirected":
        word_net = nx.read_weighted_edgelist(word_network)
        sg = nx.Graph(word_net.subgraph(word_list))

    if w == "unweighted":
        nx.write_edgelist(sg, word_network.rsplit(".", 1)[0] + "_wordlist_subnetwork.edges")
    elif w == "weighted":
        nx.write_weighted_edgelist(sg, word_network.rsplit(".", 1)[0] + "_wordlist_subnetwork.edges")

    return sg
Esempio n. 53
0
def getnx(self, ts ,save=False):
    '''
    Create a networkx graph from a DSMACC new class
    
    Usage: 
        getnx(a,a.ts[-1], 'propane')
    '''
    
    self.create_posjac()
    G = nx.DiGraph()
    
    posjac = self.posjac.loc[ts,:]
    split = [i.split('->') for i in posjac.index]
    
    for e in range(len(split)):
        G.add_edge(split[e][0],split[e][1],weight=posjac[e])
    G.remove_edges_from(G.selfloop_edges())
    
    if save:
        nx.write_weighted_edgelist(G, save+'.wedgelist')
    #G=nx.read_weighted_edgelist('propane.wedgelist',create_using=nx.DiGraph)

    return G 
Esempio n. 54
0
def getnx(self, ts ,save=False,ignore=[] ):
    '''
    Create a networkx graph from a DSMACC new class
    
    Usage: 
        getnx(a,a.ts[-1], 'propane')
    '''
    try: self.posjac
    except:self.create_posjac()
    
    G = nx.DiGraph()
    
    posjac = self.posjac.loc[ts,:]
    split = [i.split('->') for i in posjac.index]
    
    p = [i for i in posjac if i != 0 ]
    mn = np.min(p)
    mx = np.log10(np.max(p) - mn )
    mn = np.log10(mn)
    for e in range(len(split)):
        if posjac[e] > 0 :
            G.add_edge(split[e][0],split[e][1],weight=1e-4+(np.log10(posjac[e])-mn)/mx )
    G.remove_edges_from(G.selfloop_edges())
    
    #no more zero concentration edges
    G = rm_nodes (G, set(G.nodes()) - set(self.spec.columns))
    G = rm_nodes (G, ignore)
    #rm isolates
    G = rm_nodes(G,list(nx.isolates(G)))

    
    
    if save:
        nx.write_weighted_edgelist(G, save+'.wedgelist')
    #G=nx.read_weighted_edgelist('propane.wedgelist',create_using=nx.DiGraph)

    return G 
Esempio n. 55
0
def load_infopath(fname):
    # Load infopath output lines
    f = open(fname, 'rU')
    lines = [l for l in f]

    # Break lines into the node names and the edge attributes
    split = lines.index('\n')

    # Generate graph
    G = nx.DiGraph()
    node_names=[int(l.split(',')[0]) for l in lines[:split]]
    G.add_nodes_from(node_names)

    # Clean up edges
    edges_raw = [l.split(',') for l in lines[split+1:]]
#    edges_clean = [((int(l[0]),int(l[1])), np.mean(map(float, l[3::2]))) for l in edges_raw]
    edges_clean = [((int(l[0]),int(l[1])), float(l[-1])) for l in edges_raw]
    # Add edges to graph
    for e in edges_clean:
        if e[1]>0:
            G.add_edge(e[0][0],e[0][1], weight=e[1])
    nx.write_weighted_edgelist(G, fname[:-4]+'_avg.txt')

    return G
Esempio n. 56
0
def weight_preserving_configuration_model(G,filename=' '):
	import random as rn
	import time
	weight_dictionary=nx.get_edge_attributes(G,'weight');
	weight_sequence=weight_dictionary.values();
	degree_sequence=list(nx.degree(G).values());

	rn.seed(rn.randint(0,1000000)+time.time());
	E=nx.configuration_model(degree_sequence);
	E=nx.Graph(E);
	E.remove_edges_from(E.selfloop_edges());
	weight_sequence_temp=weight_sequence;
	for t in range(100):
		rn.shuffle(weight_sequence_temp);
		
	for e in E.edges_iter():
		E.edge[e[0]][e[1]]['weight']=weight_sequence_temp[0];
		weight_sequence_temp=weight_sequence_temp[1:];
	
	if filename!=' ':
		nx.write_weighted_edgelist(E, filename , delimiter=' ', encoding='utf-8')
		print('Randomized edgelist dumped to '+ filename);
	
	return E;
Esempio n. 57
0
def collect_comps(G, strongly, op, path):
    if strongly:
        cc_gen = nx.strongly_connected_components(G)
        ty = 'S'
    else:
        cc_gen = nx.weakly_connected_components(G)
        ty = 'W'
    if op == 1:
        ex.collect_alt_views(ex.gen_view(cc_gen), path + "%sCCsXCountView.txt" % ty, \
                             comments= "Vertex from %sCC; Count of vertex in %sCC" % (ty,ty))
    elif op == 2:
        # write raw trpl file of only vert in giant comp
        giantcc = cull_comps(G.copy(), cc_gen, True)
        fn = 'txTripletsCounts%sGiantOnly.txt' % ty
        print "Writing %s" % fn
        nx.write_weighted_edgelist(giantcc,path + fn)
        nx.write_weighted_edgelist(giantcc,'../' + fn + '.gz')
    elif op == 3:
        # write raw trpl file of only vert not in giant comp
        giantcc = cull_comps(G.copy(), cc_gen, False)
        fn = 'txTripletsCountsNo%sGiant.txt' % ty
        nx.write_weighted_edgelist(giantcc,path + fn)
    return None
Esempio n. 58
0
def writeGraph(graph,filePath,weighted=True):
    with open(filePath,'w') as f:
        if not weighted:
            write_edgelist(graph,f);
        else:
            write_weighted_edgelist(graph,f);
Esempio n. 59
0
def syntax_net(corpus, d="directed", w="weighted"):
    with open(corpus, "r", encoding="utf-8") as f:
        lines = f.readlines()
        lines.append("")

    sentences = []
    current = []
    for l in lines:
        cleaned = l.strip()
        if len(cleaned) == 0:
            sentences.append(current)
            current = []
        else:
            current.append(tuple(cleaned.split("\t")))

    lines_parsed = sentences

    def extract(sentence):
        reduced = [(0, 0, "ROOT", "Z")] + [(int(w[0]), int(w[6]), w[1], w[4]) for w in sentence]
        return reduced

    def remove_special(sentence):
        def first_special(sent):
            for word in sent:
                if word[2] == "--" or word[2] == "-" or word[2] == "%":
                    continue
                if word[3] == "Z":
                    return word[0], word[1]
            return ()

        def rename(name):
            if name == "--" or name == "-":
                return "HYPHEN"
            elif name == "%":
                return "PERCENT"
            else:
                return name

        reduced = sentence
        to_replace = first_special(reduced)
        while to_replace:
            new_reduced = []
            is_first = True

            for word in reduced:
                if word[0] == to_replace[0]:
                    continue

                if word[1] == to_replace[0]:
                    if is_first:
                        is_first = False
                        parent = to_replace[1]
                        parent_rest = word[0]

                        if to_replace[0] == to_replace[1]:
                            parent = word[0]

                        new_reduced.append((word[0], parent, rename(word[2]), word[3]))
                    else:
                        new_reduced.append((word[0], parent_rest, rename(word[2]), word[3]))
                else:
                    new_reduced.append((word[0], word[1], rename(word[2]), word[3]))

            reduced = new_reduced
            to_replace = first_special(reduced)

        return reduced

    reduced_sentences = [remove_special(extract(sent)) for sent in lines_parsed]

    syntax_edges = dict()
    for sentence, i in zip(reduced_sentences, range(len(reduced_sentences))):
        name_map = dict()
        for word in sentence:
            name_map[word[0]] = word[2]

        for word in sentence:
            parent = word[1]
            current = word[0]
            edge = (name_map[parent], name_map[current])
            if edge in syntax_edges:
                syntax_edges[edge] += 1
            else:
                syntax_edges[edge] = 1

    syntax_list = [(k[0], k[1], v) for (k, v) in syntax_edges.items()]

    if d == "directed":
        g = nx.DiGraph()
    elif d == "undirected":
        g = nx.Graph()

    if w == "unweighted":
        g.add_edges_from(syntax_list)
        nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syntax.edges")
    elif w == "weighted":
        g.add_weighted_edges_from(syntax_list)
        nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_syntax.edges")

    return g
Esempio n. 60
0
def cooccurrence_net(corpus, delimiter_list, d="directed",
                     w="weighted", window=1, lower="Yes"):
    """Creates co-occurrence network from text file.

    Links are established within a window between the
    first word and n-1 subsequent words.

    Parameters
    ----------
    corpus : file
        original text file from which the network
        will be created
    delimiter_list : list
        list of delimiters
    d : directed or undirected
        type of graph that will be created
    w : weighted or unweighted
        if weighted is selected than the weight of the link
        between two words will be proportional to the
        overall frequencies of the corresponding words
        co-occurrence within a original network
    window : int
        set of n subsequent words from a text
    lower : Yes or No
        defines whether all characters in a text will be
        changed to lower or not
    """
    with open(corpus, "r", encoding="utf-8") as f:
        if lower == "Yes":
            c_list = f.read().lower().split()
        elif lower == "No":
            c_list = f.read().split()

    if d == "directed":
        g = nx.DiGraph()
    elif d == "undirected":
        g = nx.Graph()

    delimiters = "".join(delimiter_list)

    if w == "unweighted":
        for i, word in enumerate(c_list):
            for j in range(1, window + 1):
                if i - j >= 0 and c_list[i - j][-1] not in delimiter_list:
                    g.add_edge(c_list[i - j], c_list[i].strip(delimiters))
                else:
                    break

        nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_coocurrence.edges")

    elif w == "weighted":
        for i, word in enumerate(c_list):
            for j in range(1, window + 1):
                if i - j >= 0 and c_list[i - j][-1] not in delimiter_list:
                    if g.has_edge(c_list[i - j], c_list[i].strip(delimiters)):
                        g[c_list[i - j]][c_list[i].strip(delimiters)]['weight'] += 1
                    else:
                        g.add_edge(c_list[i - j], c_list[i].strip(delimiters), weight=1)
                else:
                    break

        nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_coocurrence.edges")

    return g