def save_mat(conn_matrix, est_path, fmt='npy'): """ Threshold a diffusion structural connectivity matrix using any of a variety of methods. Parameters ---------- conn_matrix : array Adjacency matrix stored as an m x n array of nodes and edges. est_path : str File path to .npy file containing graph with thresholding applied. fmt : str Format to save connectivity matrix/graph (e.g. .npy, .pkl, .graphml, .txt, .ssv, .csv). Default is .npy. """ import networkx as nx G = nx.from_numpy_array(conn_matrix) G.graph['ecount'] = nx.number_of_edges(G) G = nx.convert_node_labels_to_integers(G, first_label=1) if fmt == 'edgelist_csv': nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.csv'), encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(G, "%s%s" % (est_path.split('.npy')[0], '.pkl')) elif fmt == 'graphml': nx.write_graphml(G, "%s%s" % (est_path.split('.npy')[0], '.graphml')) elif fmt == 'txt': np.savetxt("%s%s" % (est_path.split('.npy')[0], '.txt'), nx.to_numpy_matrix(G)) elif fmt == 'npy': np.save(est_path, nx.to_numpy_matrix(G)) elif fmt == 'edgelist_ssv': nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.ssv'), delimiter=" ", encoding='utf-8') else: raise ValueError('\nERROR: File format not supported!') return
def main(): parser = argparse.ArgumentParser() parser.add_argument('edgelist', nargs='?', default='analysis/combined_716eef6.prob') parser.add_argument('outfile', nargs='?') parser.add_argument('-t', '--interconnectivity', default=0.80) parser.add_argument('-d', '--density', default=0.80) parser.add_argument('-m', '--min-edge', default=0.20) args = parser.parse_args() if args.outfile == None: args.outfile = args.edgelist.replace('.prob', '') + '.analysis.tmp' threshold_min_weight = args.min_edge threshold_interconnectivity = args.interconnectivity threshold_density = args.density print_err("Loading graph") G_sim = nx.read_weighted_edgelist(enforce_min( skip_comments(open(args.edgelist, 'rb')), threshold_min_weight), nodetype=int, delimiter=',') print_err('Loaded (V={:}, E={:})'.format(len(G_sim), G_sim.size())) cc = analyse(G_sim, threshold_interconnectivity) nx.write_weighted_edgelist(G_sim.subgraph(cc), args.outfile)
def create_tag_tag_graph_weighted(): t_t = nx.Graph() query = QuestionPosts.select(QuestionPosts.Tags) \ .where(QuestionPosts.AcceptedAnswerId.is_null(False), QuestionPosts.ViewCount.is_null(False), QuestionPosts.OwnerUserId.is_null(False)) tag_results = list(query.dicts()) tag_to_int = {} running_max = 0 for result in tag_results: tags = [x for x in re.split('<|>', result['Tags']) if x] for t in tags: if t not in tag_to_int.keys(): tag_to_int[t] = running_max running_max += 1 for result in tag_results: tags = [x for x in re.split('<|>', result['Tags']) if x] for i in range(len(tags) - 1): tt_edge = (tag_to_int[tags[i]], tag_to_int[tags[i + 1]]) u, v = tt_edge if t_t.has_edge(u, v): t_t[u][v]['weight'] += 1 else: t_t.add_edge(u, v, weight=1) nx.write_weighted_edgelist(t_t, 'Data/T_T_Weighted.edgelist') with open(os.path.join(DATA_DIR, 'Tag_To_Node_Weighted'), 'w') as f: for k, v in tag_to_int.items(): f.write(k + ',' + str(v) + "\n")
def save_graph( ego, G ): # Função recebe o id do ego corrente e o grafo (lista de arestas) with open(output_dir + str(ego) + ".edge_list", 'wb') as graph: nx.write_weighted_edgelist(G, graph) # Imprimir lista de arestas COM PESO G.clear()
def syllable_net(corpus, syllable_list, d="directed", w="weighted"): """Creates syllable network from co-occurrence of syllables within words. Parameters ---------- corpus : file original text file from which the network will be created syllable_list : d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two syllables will be proportional to the overall frequencies of the corresponding syllables co-occurrence within words from a text """ with open(corpus, "r", encoding="utf-8") as f: f_r = f.readlines() words = [line.split("\t") for line in f_r] with open(syllable_list, "r", encoding="utf-8") as f: f_r = f.readlines() syllables = [line.split() for line in f_r] if d == "directed": g = nx.DiGraph() elif d == "undirected": g = nx.Graph() syllable_edges = dict() for i in words: if len(i) > 1: for j in syllables: if i[1] == j[0]: for l, r in zip(j[1].split("-")[:-1], j[1].split("-")[1:]): edge = (l, r) if edge in syllable_edges: syllable_edges[edge] += 1 else: syllable_edges[edge] = 1 else: g.add_node(i[1]) edge_list = [(k[0], k[1], v) for (k, v) in syllable_edges.items()] if w == "unweighted": g.add_edges_from(edge_list) nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges") elif w == "weighted": g.add_weighted_edges_from(edge_list) nx.write_weighted_edgelist( g, corpus.rsplit(".", 1)[0] + "_syllable.edges") return g
def main(n_start, n_count=1, n_inc=1, c_in_start=10, c_in_count=1, c_in_inc=1, c_out_start=5, c_out_count=1, c_out_inc=1, comm_count = 2, DC=False, i=0): bp_uncertain = 'src/bp' edge_frac = 1. nonedge_mult = 5. b = 2 trials = 2 os.makedirs('out', exist_ok=True) os.makedirs('data', exist_ok=True) for n in custom_range(n_start, n_count, n_inc): for c_in in custom_range(c_in_start, c_in_count, c_in_inc): for c_out in custom_range(c_out_start, c_out_count, c_out_inc): original_net = 'data/original_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) uncertain_net = 'data/noisy_net-%d-%f-%f-%f-%f-%f-%d.edges'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) uncertain_comms = 'out/uncertain_comms-%d-%f-%f-%f-%f-%f-%d.out'%(n,c_in,c_out,b,edge_frac,nonedge_mult, i) print("making and fuzzing network") G_orig = make_net(c_in, c_out, n) write_edgelist(G_orig, original_net) G, _ = fuzz_network(G_orig, 1, b, edge_frac, nonedge_mult) write_weighted_edgelist(G, uncertain_net) start1 = time() print("running belief propagation") os.system('%s -i %s -o %s -c %d -l %d -n %d' % (bp_uncertain, uncertain_net, uncertain_comms, comm_count, 3, trials)) end1 = time() with open('out/results.txt', 'a+') as out_file: out_file.write("%d %f %f\t%f %f %f\t %f %f\t %s %d\n" %(n, c_in, c_out, b,edge_frac,nonedge_mult, evaluate(uncertain_comms, n), end1-start1, str(datetime.now()), i))
def graph_generator(n, m): """ n is the number of nodes and m is the number of edges connected to each new node """ if m < 1 or m >= n: print("Network must have m>=1 and m<n" % (m, n)) return None G = nx.barabasi_albert_graph(n, m, seed=24) G = G.to_directed() for (u, v, w) in G.edges(data=True): w['weight'] = random.randint(1, 20) nx.draw(G, with_labels=True, with_edges=True) plt.axis('off') plt.show() nx.write_weighted_edgelist(G, 'test.weighted.edgelist') with open('test.weighted.edgelist', 'r+') as f: content = f.read() f.seek(0, 0) f.write('A' + '\n' + str(G.order()) + '\n' + content) import os os.system('cls') print( "A graph with nodes = ", n, " and edges = ", G.number_of_edges(), " has been downloaded to file with the name test.weighted.edgelist as arc matrics" ) return G
def step(): global time, o, g, T, perturbation_period, pert_accu, g_ut time +=1 if pert_accu == perturbation_period: if T > 600 and T < 3000: learning() pert_accu = 0 T += 1 T_list.append( T ) U_plot.append( global_uo(o) ) randomize_states(o) else: pert_accu += 1 i = rd.choice(o.nodes()) node_state(i) if time == 3599998: #if time == 3598: nx.write_weighted_edgelist(g, 'g_edgelist_end.csv') nx.write_weighted_edgelist(o, 'o_edgelist_end.csv')
def combine_data_get_sp_paths_costs_activated(G_unweighted, SI, response_nw_fname, paths): SI_relevant = mic_fun.get_relevant_SI(SI) # Map gene expression values onto unweighted network G_response = net_fun.get_activated_response_network( SI_relevant, G_unweighted) print("Got response network with ", len(G_response.nodes()), " nodes and ", len(G_response.edges()), " edges") # Drop SI values for genes which don't map to response network genes_to_drop = set(SI.index) - set(G_response.nodes()) SI = SI.drop(genes_to_drop) if len(paths) == 0: # Actual data # Write the response network to file nx.write_weighted_edgelist(G_response, response_nw_fname, delimiter='\t') # Get all-pairs-shortest-path costs # Return value is a pandas dataframe, indexed by the string src#dest Pij = net_fun.get_all_sp_paths_costs(G_response) print("Got shortest path costs for ", Pij.shape[0], " node-pairs") elif len(paths) > 0: # Randomized data # Get cost of the same paths that are the shortest in the actual dataset Pij = net_fun.get_costs_of_given_paths(G_response, paths) print("Got cost of paths which are shortest in the actual data") return Pij, SI
def save_graph(self, graphname, fmt='edgelist'): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph **Optional Arguments:** fmt: - Output graph format """ self.g.graph['ecount'] = nx.number_of_edges(self.g) g = nx.convert_node_labels_to_integers(self.g, first_label=1) if fmt == 'edgelist': nx.write_weighted_edgelist(g, graphname, encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(g, graphname) elif fmt == 'graphml': nx.write_graphml(g, graphname) else: raise ValueError( 'edgelist, gpickle, and graphml currently supported') pass
def write_export(output_directory, export_ref_annotated_format, span, graph): if not os.path.exists(output_directory): os.mkdir(output_directory) if export_ref_annotated_format == "gexf": log("write gexf export", span) networkx.write_gexf( graph, os.path.join(output_directory, "%s_annotated.gexf" % span)) elif export_ref_annotated_format == "edgelist": log("write csv export", span) networkx.write_weighted_edgelist(graph, os.path.join( output_directory, "%s_annotated.csv" % span), delimiter="\t") elif export_ref_annotated_format == "pajek": log("write pajek export", span) networkx.write_pajek( graph, os.path.join(output_directory, "%s_annotated.net" % span)) elif export_ref_annotated_format == "graphml": log("write pajek export", span) networkx.write_graphml( graph, os.path.join(output_directory, "%s_annotated.graphml" % span)) else: log("no compatible export format specified", span)
def cleanseWeightedEdgeData(csvfile, cleansedcsvfile, ctyarray): """ There are errors in data pertaining to UN country nodes. These nodes may not exist either due to spelling or they refer to regional entities. Such nodes are detected and removed :param edgefile: :param ctyarray: A list of UN Countries :return: """ cleansedfilewithspace = 'temp' + str(random.random()) + '.edgelist' graph = nx.read_weighted_edgelist(csvfile, delimiter=',', create_using=nx.DiGraph()) for n in graph.nodes(): #For each node in the graph try: ctylongform = ctyarray[n] except: print "This node doesn't exists in country list...dropping node ", n graph.remove_node(n) nx.write_weighted_edgelist(graph, cleansedfilewithspace) edgelist = [] with open(cleansedfilewithspace, 'r') as csvfile: countryreader = csv.reader(csvfile, delimiter=' ') for row in countryreader: src = row[0] tgt = row[1] weight = row[2] edgelist.append([src, tgt, weight]) with open(cleansedcsvfile, 'wb') as csvfile: edgewriter = csv.writer(csvfile, delimiter=',') for item in edgelist: edgewriter.writerow([item[0], item[1], item[2]])
def write_output(sdp_results, outfname, subgraphfile): S, obj, obj_rounded = sdp_results print "Returning subgraph with OQC score", obj_rounded, "(%s)" % obj n = len(S) if 'weight' in S.edges_iter(data=True).next()[2]: e = sum(data['weight'] for u, v, data in S.edges_iter(data=True)) else: e = S.number_of_edges() header = "|S|,|E|,density,diameter,triangle density,OQC,obj\n" with open(outfname, 'w') as f: f.write(header) if n > 0: f.write(str(n) + ',') f.write(str(S.number_of_edges()) + ',') if n > 1: f.write(str(2. * e / (n * (n - 1))) + ',') else: f.write(str(0) + ',') if nx.is_connected(S): f.write(str(nx.diameter(S)) + ',') else: f.write('inf,') if n > 2: f.write(str(2. * sum(i for i in nx.triangles(S).itervalues()) / (n * (n - 1) * (n - 2))) + ',') else: f.write(str(0) + ',') f.write(str(obj_rounded) + ',') f.write(str(obj) + '\n') else: f.write("0,0,0,0,0,0,0,") f.write("%s\n" % obj) nx.write_weighted_edgelist(S, subgraphfile)
def export(graph, span): if CONFIG["export_ref_format"] == "gexf": print("Writing .gexf export") networkx.write_gexf(graph, os.path.join(CONFIG["parsed_data"], span, "%s.gexf" % span), encoding="UTF-8") elif CONFIG["export_ref_format"] == "edgelist": print("Writing .csv export") networkx.write_weighted_edgelist(graph, os.path.join(CONFIG["parsed_data"], span, "%s.csv" % span), delimiter="\t") elif CONFIG["export_ref_format"] == "pajek": print("Writing .pajek export") networkx.write_pajek(graph, os.path.join(CONFIG["parsed_data"], span, "%s.net" % span), encoding='UTF-8') elif CONFIG["export_ref_format"] == "json": print("Writing .json export") data = json_graph.node_link_data(graph) json.dump(data, open( os.path.join(CONFIG["parsed_data"], span, "%s.json" % span), "w"), encoding='UTF-8') else: print("No export compatible with the specified export format!")
def MCLAlgorithm(self, inflation=3.3): """ Metoda wykonuje grupowanie za pomocą algorytmu MCL @param inflation: wartość współczynnika inflacji algorytmu MCL @requires: program MCL w ścieżce wykonywalnej @rtype: list @return: lista list z członkami grup """ try: nx.write_weighted_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t") except: nx.write_edgelist(self.graph, "/tmp/mcl-input", delimiter="\t") import os logger.debug("Invoking mcl command ...") os.system("mcl /tmp/mcl-input --abc -te 2 -I %f -o /tmp/mcl-output" % inflation) logger.debug("MCL clustering done") out_file = open("/tmp/mcl-output", 'r') lines = out_file.readlines() partition = list() import string for line in lines: partition.append(map(int, string.split(line))) return partition
def init_minimal(): global g, o, file_num g = nx.complete_graph(args.nodes) for n in g.nodes(): g.node[n]['s'] = 1 for i,j in g.edges(): g.edge[i][j]['weight'] = 0 o = g.copy() #for i,j in o.edges(): #o.edge[i][j]['weight'] = rd.choice([1,-1]) for i,j in o.edges(): if rd.random() < 0.07: o.edge[i][j]['weight'] = rd.choice([1,-1]) # o.edge[0][1]['weight'] = 1 # o.edge[0][2]['weight'] = 1 # o.edge[1][2]['weight'] = 1 nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_%d.csv' % (args.runid, file_num)) nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_%d.csv' % (args.runid, file_num))
def saveWeightedGraphAsCSV(graph, tocsvfile): """ Save NetworkX weighted graph object into an edge file (CSV). NetworkX save it as a file with spaces, which cannot be properly processed by Excel for further manipulation. Thus will convert it into proper CSV myself. :param graph: :param tocsvfile: :return: """ cleansedfilewithspace = 'temp' + str(random.random()) + '.edgelist' print "Size of this subgraph: ", len( graph.nodes()), " First node is: ", graph.nodes()[0] nx.write_weighted_edgelist(graph, path=cleansedfilewithspace) edgelist = [] with open(cleansedfilewithspace, 'r') as csvfile: countryreader = csv.reader(csvfile, delimiter=' ') for row in countryreader: src = row[0] tgt = row[1] weight = row[2] edgelist.append([src, tgt, weight]) with open(tocsvfile, 'wb') as csvfile: edgewriter = csv.writer(csvfile, delimiter=',') for item in edgelist: edgewriter.writerow([item[0], item[1], item[2]])
def save_graph(self, graphname, fmt='edgelist'): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph **Optional Arguments:** fmt: - Output graph format """ self.g.graph['ecount'] = nx.number_of_edges(self.g) g = nx.convert_node_labels_to_integers(self.g, first_label=1) if fmt == 'edgelist': nx.write_weighted_edgelist(g, graphname, encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(g, graphname) elif fmt == 'graphml': nx.write_graphml(g, graphname) else: raise ValueError('edgelist, gpickle, and graphml currently supported') pass
def save_mat(conn_matrix, est_path, fmt='npy'): """ :param conn_matrix: :param est_path: :param fmt: :return: """ import networkx as nx G = nx.from_numpy_array(conn_matrix) G.graph['ecount'] = nx.number_of_edges(G) G = nx.convert_node_labels_to_integers(G, first_label=1) if fmt == 'edgelist_csv': nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.csv'), encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(G, "%s%s" % (est_path.split('.npy')[0], '.pkl')) elif fmt == 'graphml': nx.write_graphml(G, "%s%s" % (est_path.split('.npy')[0], '.graphml')) elif fmt == 'txt': np.savetxt("%s%s" % (est_path.split('.npy')[0], '.txt'), nx.to_numpy_matrix(G)) elif fmt == 'npy': np.save(est_path, nx.to_numpy_matrix(G)) elif fmt == 'edgelist_ssv': nx.write_weighted_edgelist(G, "%s%s" % (est_path.split('.npy')[0], '.ssv'), delimiter=" ", encoding='utf-8') else: raise ValueError('\nERROR: File format not supported!') return
def clean(target, verbose=False, **kwargs): """Cleans the target dataset by making the graph undirected and connected""" if target == "all": for t in GRAPHS.keys(): clean(t) return if target not in GRAPHS.keys(): raise ValueError("Unknown target.") print("Cleaning dataset {}...".format(target)) target_dir = os.path.join(GRAPH_DIR, target) edgelist_filename = os.path.join(target_dir, GRAPHS[target]["edgelist"]) basename, ext = os.path.splitext(edgelist_filename) weighted_edgelist_filename = "{}_weighted{}".format(basename, ext) if target == "PPI": with open(edgelist_filename, 'r') as f: G = nx.readwrite.json_graph.node_link_graph(json.load(f)) M = sorted([a for a in nx.connected_component_subgraphs(G) if len(a) > 100], key=len) f = pd.DataFrame(np.load(os.path.join(target_dir, "ppi/ppi/ppi-feats.npy"))) c = pd.read_json(os.path.join(target_dir, "ppi/ppi/ppi-class_map.json")).T for i, a in enumerate(M): nx.write_edgelist(a, os.path.join(target_dir, "ppi_{:02}.edgelist".format(i+1))) nodes = [n for n in a.nodes()] fi = f.loc[nodes, :] ci = c.loc[nodes, :] ci.to_json(os.path.join(target_dir, "ppi_{:02d}.classes".format(i+1))) fi.to_json(os.path.join(target_dir, "ppi_{:02d}.features".format(i+1))) else: G = nx.read_edgelist(edgelist_filename, nodetype=int) G = max(nx.connected_component_subgraphs(G), key=len) nx.write_edgelist(G, edgelist_filename) for _, _, d in G.edges(data=True): if "weight" not in d: d["weight"] = 1 nx.write_weighted_edgelist(G, weighted_edgelist_filename)
def generate_failure_network2(): g = nx.read_weighted_edgelist('edgelist/ninux/0', nodetype=int) index = 0 for i in range(10): random.seed(1234) bc = nx.betweenness_centrality(g) nodes = [(k, v) for k, v in bc.items()] nodes.sort(key=lambda x: x[1], reverse=True) to_rem = 0 i = 0 for node in nodes: g1 = g.copy() to_rem = node[0] g1.remove_node(node[0]) if nx.is_connected(g1): print(i) break i += 1 g.remove_node(to_rem) #print(nx.number_connected_components(g)) for j in range(4): nx.write_weighted_edgelist(g, 'edgelist/testdata2/' + str(index)) nx.write_weighted_edgelist( g, 'edgelist/testdata2/' + str(40 * 2 - 1 - index)) index += 1
def weight_preserving_configuration_model(G, filename=' '): import random as rn import time weight_dictionary = nx.get_edge_attributes(G, 'weight') weight_sequence = weight_dictionary.values() degree_sequence = list(nx.degree(G).values()) rn.seed(rn.randint(0, 1000000) + time.time()) E = nx.configuration_model(degree_sequence) E = nx.Graph(E) E.remove_edges_from(E.selfloop_edges()) weight_sequence_temp = weight_sequence for t in range(100): rn.shuffle(weight_sequence_temp) for e in E.edges_iter(): E.edge[e[0]][e[1]]['weight'] = weight_sequence_temp[0] weight_sequence_temp = weight_sequence_temp[1:] if filename != ' ': nx.write_weighted_edgelist(E, filename, delimiter=' ', encoding='utf-8') print('Randomized edgelist dumped to ' + filename) return E
def save_graph(G, output_path, delimiter=',', write_stats=True, write_weights=False, write_dir=True): r""" Saves a graph to a file as an edgelist of weighted edgelist. If the stats parameter is set to True the file will include several lines containing the same basic graph statistics as provided by the get_stats function. For undirected graphs, the method stores both directions of every edge. Parameters ---------- G : graph A NetworkX graph output_path : file or string File or filename to write. If a file is provided, it must be opened in 'wb' mode. delimiter : string, optional The string used to separate values. Default is ','. write_stats : bool, optional Sets if graph statistics should be added to the edgelist or not. Default is True. write_weights : bool, optional If True data will be stored as weighted edgelist (e.g. triplets src, dst, weight) otherwise as normal edgelist. If the graph edges have no weight attribute and this parameter is set to True, a weight of 1 will be assigned to each edge. Default is False. write_dir : bool, optional This option is only relevant for undirected graphs. If False, the graph will be stored with a single direction of the edges. If True, both directions of edges will be stored. Default is True. """ # Write the graph stats in the file if required if write_stats: get_stats(G, output_path) # Open the file where data should be stored f = open(output_path, 'a+b') # Write the graph to a file and use both edge directions if graph is undirected if G.is_directed(): # Store edgelist if write_weights: J = nx.DiGraph() J.add_weighted_edges_from(G.edges.data('weight', 1)) nx.write_weighted_edgelist(J, f, delimiter=delimiter) else: nx.write_edgelist(G, f, delimiter=delimiter, data=False) else: if write_dir: H = nx.to_directed(G) J = nx.DiGraph() else: H = G J = nx.DiGraph() # Store edgelist if write_weights: J.add_weighted_edges_from(H.edges.data('weight', 1)) nx.write_weighted_edgelist(J, f, delimiter=delimiter) else: nx.write_edgelist(H, f, delimiter=delimiter, data=False)
def main(_): dataset_path = Path(FLAGS.dataset_path) item_name = FLAGS.amazon_reviews.split("5")[0][:-1] if FLAGS.item == "amazon" else FLAGS.item if not FLAGS.text_embeddings: text_file_path = dataset_path / FLAGS.file if FLAGS.item == "keen": data = load_jsonl(text_file_path) all_keens = keen.get_keens(data) # keeps keens with at least one gem keens = {k: v for k, v in all_keens.items() if v.gems} print(f"Total amount of keens: {len(all_keens)}") print(f"Keen with at least one gem: {len(keens)}") if FLAGS.item == "keen": texts = keen.build_texts_from_keens(keens) else: texts = keen.build_texts_from_gems(keens) elif FLAGS.item == "ml-1m": texts = movielens.build_texts_from_movies(text_file_path) elif FLAGS.item == "amazon": texts = amazon.build_text_from_items(dataset_path, FLAGS.amazon_reviews, FLAGS.amazon_meta) else: raise ValueError(f"Unrecognized item: {FLAGS.item}") print(f"Items with text from {item_name} to encode with USE: {len(texts)}") print(list(texts.items())[:3]) weight_first_embed = FLAGS.item == "keen" or "amazon" in FLAGS.dataset_path embeds = build_item_embeds(texts, FLAGS.use_model_url, weight_first_embedding=weight_first_embed) export_text_embeddings(embeds, dataset_path, item_name) else: embeds = load_text_embeddings(FLAGS.text_embeddings) if FLAGS.debug: embeds = {k: v for k, v in list(embeds.items())[:50]} if len(embeds) < FLAGS.max_embedding_len: item_ids, cossim_matrix = build_cossim_matrix(embeds) graph = build_graph(item_ids, cossim_matrix, FLAGS.threshold, FLAGS.use_distance) else: # if there are N embeddings, the cossim_matrix is N^2, which might not fit in memory for large values of N. # In this case, we compute the cosine similarity for each pair of embeddings, but this is deadly slow graph = build_graph_from_embeds(embeds, FLAGS.threshold, FLAGS.use_distance) print(f"Graph info:\n{nx.info(graph)}") if FLAGS.plot: plot_graph(graph, dataset_path / f'{FLAGS.item}_{FLAGS.item}_graph_th{FLAGS.threshold}.png') neighs_and_dists = get_neighbors_with_distances(graph) result = {"item_item_distances": neighs_and_dists} # stores graph graph_file_name = f'{item_name}_th{FLAGS.threshold}_graph.edgelist' nx.write_weighted_edgelist(graph, str(dataset_path / graph_file_name)) # stores distances for preprocessing file_name = f'{item_name}_th{FLAGS.threshold}_{"cos" if FLAGS.use_distance else "hop"}distances.pickle' save_as_pickle(dataset_path / file_name, result)
def print_influence_zone(G, node_wts, fc, noi, downstream, upstream): nodes = set(downstream).union(set(upstream)) inf_zone = G.subgraph(nodes) nx.write_weighted_edgelist(inf_zone, noi+'_inf_zone.txt') with open(noi+'_inf_zone_nodes.txt', 'wb') as f: f.write( "\t".join(["node", "weight", "fold_change"]) + "\n" ) for node in nodes: f.write( "\t".join([node, node_wts[node], fc[node]]) + "\n" )
def data(): global time, o, g, file_num nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_end_%d.csv' % (args.runid, file_num)) nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_end_%d.csv' % (args.runid, file_num)) GU = open('run_%s_gu_%d.txt' % (args.runid, file_num), 'w') gu = global_uo(o) GU.write(str(gu)) GU.close()
def save_graph(graph, name): if graph is None or not USE_GRAPH_FILE: return path = get_graph_path(name) if not os.path.exists(path): nx.write_weighted_edgelist(graph, path, encoding="utf-8")
def save(self): """ Saves itself to a file. The data structure could get quite large, caching to disk is a good idea ** note ** replace with Redis in production -- Redis dependency is removed for Open Source release to decrease complexity """ l.info("<<<<<<< SAVING WORD-GRAPH >>>>>>>") net.write_weighted_edgelist(self.word_graph, "wordgraph_edgelist.txt")
def buildGraph(cosin_similarities, data_set_name, EdgeLists_folder): G = nx.from_numpy_array(cosin_similarities) filter = [(u, v, d) for (u, v, d) in G.edges(data=True) if (u < v and d['weight'] > 0.95 and u != v)] G_filter = nx.Graph(filter) PATH = os.path.join(EdgeLists_folder, data_set_name) nx.write_weighted_edgelist(G_filter, PATH + ".file") return G_filter
def save(self): """ Saves itself to a file. The data structure could get quite large, caching to disk is a good idea ** note ** replace with Redis in production -- Redis dependency is removed for Open Source release to decrease complexity """ l.info("<<<<<<< SAVING WORD-GRAPH >>>>>>>") net.write_weighted_edgelist(self.word_graph,"wordgraph_edgelist.txt")
def syllable_net(corpus, syllable_list, d="directed", w="weighted"): """Creates syllable network from co-occurrence of syllables within words. Parameters ---------- corpus : file original text file from which the network will be created syllable_list : d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two syllables will be proportional to the overall frequencies of the corresponding syllables co-occurrence within words from a text """ with open(corpus, "r", encoding="utf-8") as f: f_r = f.readlines() words = [line.split("\t") for line in f_r] with open(syllable_list, "r", encoding="utf-8") as f: f_r = f.readlines() syllables = [line.split() for line in f_r] if d == "directed": g = nx.DiGraph() elif d == "undirected": g = nx.Graph() syllable_edges = dict() for i in words: if len(i) > 1: for j in syllables: if i[1] == j[0]: for l, r in zip(j[1].split("-")[:-1], j[1].split("-")[1:]): edge = (l, r) if edge in syllable_edges: syllable_edges[edge] += 1 else: syllable_edges[edge] = 1 else: g.add_node(i[1]) edge_list = [(k[0], k[1], v) for (k, v) in syllable_edges.items()] if w == "unweighted": g.add_edges_from(edge_list) nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges") elif w == "weighted": g.add_weighted_edges_from(edge_list) nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_syllable.edges") return g
def findTEdges(G2005, G2006): G = nx.Graph() for e in G2006.edges(): if not G2005.has_edge( *e): #e does not exist in G2005 but exists in G2006 #edges.append(e) G.add_edge(*e) with open('T.edgelist', 'wb+') as fp: nx.write_weighted_edgelist(G, fp, delimiter='*')
def write_edgelist(path_to_file): graphe = nx.read_gexf(path_to_file + ".gexf") for source, target in graphe.edges(): if source == target : graphe.remove_edge(source,target) else: graphe[source][target].clear() graphe[source][target]['weight'] = 1 nx.write_weighted_edgelist(nx.convert_node_labels_to_integers(graphe), path_to_file + ".txt")
def target_edges(G1, G2): target_graph = nx.Graph() #loop over edges in graph2 for edges in G2.edges(): #if it is not included in graph1 then add it to target graph if not G1.has_edge(*edges): target_graph.add_edge(*edges) file = open("target_graph.txt", "wb+") nx.write_weighted_edgelist(target_graph, file, delimiter=',') file.close() print(len(target_graph.edges))
def main(): for data_set_name in [ 'airport', 'authors', 'collaboration', 'facebook', 'congress', 'forum' ]: graph = networkx.read_weighted_edgelist('../graph/' + data_set_name + '.tsv') graph = networkx.convert_node_labels_to_integers(graph) networkx.write_weighted_edgelist(graph, '../reindexed_graphs/' + data_set_name + '.tsv', delimiter='\t')
def write_networks(self): """ Writes all networks in a Nets file to graphml files. :return: """ try: for network in self.networks: path = self.inputs['fp'] + '/' + network + '.txt' nx.write_weighted_edgelist(G=self.networks[network], path=path) except Exception: logger.error("Unable to write networks to disk. ", exc_info=True)
def save_graph(self, OUT_PATH): """ Save the graph in OUT_PATH """ if self.graphtool: for i, cost_class in enumerate(self.cost_classes): self.graph.edge_properties[cost_class] = self.cost_props[i] self.graph.edge_properties["weight"] = self.weight self.graph.save(OUT_PATH + ".xml.gz") else: nx.write_weighted_edgelist(self.graph, OUT_PATH + '.weighted.edgelist')
def update_wrapper(infoname, priorname, outname, outavgname): G = update(infoname, priorname) for e in G.edges(): print G[e[0]][e[1]]['params'] print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m') nx.write_edgelist(G,outname) A = G.copy() for e in A.edges(): p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=G[e[0]][e[1]]['params'][1]).stats(moments='m') # if p == nan: A[e[0]][e[1]]['weight'] = 0 A[e[0]][e[1]]['weight'] = p nx.write_weighted_edgelist(A,outavgname,delimiter=',')
def original_generate_token_graph(): corp = [] sentences = [] # Initialize an empty list of sentences input_folders = [ sub_dir for sub_dir in listdir(dataset_folder) if isdir(join(dataset_folder, sub_dir)) ] for folder in input_folders: dir_path = dataset_folder + os.sep + folder + os.sep files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ] for file in files: file_path = dir_path + file file_name, file_extension = splitext(file_path) doc = "" if file_extension == ".pdf": doc = convert_pdf_to_txt(file_path) elif file_extension == ".docx": doc = convert_docx_to_txt(file_path) else: continue if doc != "": doc = doc.decode("utf8") #doc = words_to_phrases(doc) doc = doc.lower() doc = doc_to_wordlist(doc,True) corp = it.chain(corp,doc) #sentences += doc_to_sentences(doc, tokenizer, remove_stopwords=False) corp = list(corp) graph = nx.Graph() weights = Counter() edges = set() window = corp[0:5] for tup in it.permutations(window,2): weights[tup] += 1 for i in range(3,len(corp)-2): for j in range(i-2,i+2): weights[(corp[j],corp[i+2])] += 1 weights[(corp[i+2],corp[j])] += 1 edges.add((corp[i+2],corp[j])) for e in edges: graph.add_edge(e[0], e[1], {'weight':weights[e]}) print graph nx.write_weighted_edgelist(graph, "graph.g") print nx.to_numpy_matrix(graph) np.savetxt("graph.adj", nx.to_numpy_matrix(graph)) print "finished"
def ego_word_subnet(word_network, word, radius=1, d="directed", w="weighted", neighborhood="all"): """Creates word-ego network which is a subnetwork of neighbours centered at one specified node (word) within a given radius. Parameters ---------- word_network : edge list of original network word : string subnetwork will be created of neighbours centered at specified word radius : int radius from which subnetwork will be created d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two words will be proportional to the overall frequencies of the corresponding words co-occurrence within a original network neighborhood : successors, predecessors or all """ if d == "directed": word_net = nx.read_weighted_edgelist(word_network, create_using=nx.DiGraph()) if neighborhood == "successors": sg = nx.ego_graph(word_net, word, radius) elif neighborhood == "predecessors": sg = nx.ego_graph(word_net.reverse(), word, radius) elif neighborhood == "all": sg = nx.ego_graph(word_net, word, radius, undirected=True) elif d == "undirected": word_net = nx.read_weighted_edgelist(word_network) sg = nx.ego_graph(word_net, word, radius) if w == "unweighted": nx.write_edgelist( sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges") elif w == "weighted": nx.write_weighted_edgelist( sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges") return sg
def main(n_start, n_count=1, n_inc=1, c_in_start=10, c_in_count=1, c_in_inc=1, c_out_start=5, c_out_count=1, c_out_inc=1, comm_count=2, DC=False, i=0): bp_uncertain = 'src/bp' edge_frac = 1. nonedge_mult = 5. b = 2 trials = 2 os.makedirs('out', exist_ok=True) os.makedirs('data', exist_ok=True) for n in custom_range(n_start, n_count, n_inc): for c_in in custom_range(c_in_start, c_in_count, c_in_inc): for c_out in custom_range(c_out_start, c_out_count, c_out_inc): original_net = 'data/original_net-%d-%f-%f-%f-%f-%f-%d.edges' % ( n, c_in, c_out, b, edge_frac, nonedge_mult, i) uncertain_net = 'data/noisy_net-%d-%f-%f-%f-%f-%f-%d.edges' % ( n, c_in, c_out, b, edge_frac, nonedge_mult, i) uncertain_comms = 'out/uncertain_comms-%d-%f-%f-%f-%f-%f-%d.out' % ( n, c_in, c_out, b, edge_frac, nonedge_mult, i) print("making and fuzzing network") G_orig = make_net(c_in, c_out, n) write_edgelist(G_orig, original_net) G, _ = fuzz_network(G_orig, 1, b, edge_frac, nonedge_mult) write_weighted_edgelist(G, uncertain_net) start1 = time() print("running belief propagation") os.system('%s -i %s -o %s -c %d -l %d -n %d' % (bp_uncertain, uncertain_net, uncertain_comms, comm_count, 3, trials)) end1 = time() with open('out/results.txt', 'a+') as out_file: out_file.write("%d %f %f\t%f %f %f\t %f %f\t %s %d\n" % (n, c_in, c_out, b, edge_frac, nonedge_mult, evaluate(uncertain_comms, n), end1 - start1, str(datetime.now()), i))
def write_edgelist(path_to_file): graphe = nx.read_gexf(path_to_file + ".gexf") if type(graphe) == nx.MultiDiGraph: print "has_multiple_edges" graphe = nx.DiGraph(graphe) if type(graphe) == nx.MultiGraph: print "has_multiple_edges" graphe = nx.Graph(graphe) for source, target in graphe.edges(): graphe[source][target].clear() graphe[source][target]["weight"] = 1 nx.write_weighted_edgelist(nx.convert_node_labels_to_integers(graphe), path_to_file + ".txt")
def KCored(G): # Set k value k_values = [] # k = 0.0 nodes = G.nodes() for node in nodes: k_values.append(G.degree(node)) k_values = sorted(k_values) k = k_values[len(k_values)/2] # print clusterFile, k # print min(k_values) # print max(k_values) subG = nx.k_core(G, k=k) # Returns subgraph # print len(G.nodes()), '\t', len(subG.nodes()) nx.write_weighted_edgelist(subG, outDirK + clusterFile, 'w')
def init_full(): global g, o, file_num randomize_states(g) for i,j in g.edges(): g.edge[i][j]['weight'] = 0 o = g.copy() for i,j in o.edges(): if rd.random() < 0.1: o.edge[i][j]['weight'] = rd.choice([1,-1]) nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_%d.csv' % (args.runid, file_num)) nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_%d.csv' % (args.runid, file_num))
def init_full(): global g, o randomize_states(g) for i,j in g.edges(): g.edge[i][j]['weight'] = 0 o = g.copy() for i,j in o.edges(): if rd.random() < 0.07: o.edge[i][j]['weight'] = rd.choice([1,-1]) nx.write_weighted_edgelist(g, 'g_edgelist.csv') nx.write_weighted_edgelist(o, 'o_edgelist.csv')
def main(): infoname = 'sim_data_inferred.txt' priorname = 'sim_data_prior.txt' outname = 'updated.txt' outavgname = 'updated_avg.txt' G = update(infoname, priorname) for e in G.edges(): print G[e[0]][e[1]]['params'] print e, stats.gamma(G[e[0]][e[1]]['params'][0], scale=1./G[e[0]][e[1]]['params'][1]).stats(moments='m') nx.write_edgelist(G,outname) A = G.copy() for e in A.edges(): p = stats.gamma(G[e[0]][e[1]]['params'][0], scale=1./G[e[0]][e[1]]['params'][1]).stats(moments='m') # if p == nan: A[e[0]][e[1]]['weight'] = 0 A[e[0]][e[1]]['weight'] = p nx.write_weighted_edgelist(A,outavgname,delimiter=',')
def data(): global time, o, g, file_num UO = [] nx.write_weighted_edgelist(g, 'run_%s_g_edgelist_end_%d.csv' % (args.runid, file_num)) nx.write_weighted_edgelist(o, 'run_%s_o_edgelist_end_%d.csv' % (args.runid, file_num)) GU = open('run_%s_gu_%d.txt' % (args.runid, file_num), 'w') gu = global_uo(o) GU.write(str(gu)) GU.close() LU = open('run_%s_UO_%d.txt' % (args.runid, file_num), 'w') for i in o.nodes(): UO.append( local_uo( i, o )) lo_sum =sum(UO) LU.write(str(UO)) LU.close() print lo_sum
def grapheme_net(syllable_network, d="directed", w="weighted"): """Creates grapheme network. The structure of grapheme network depends on a existing network of syllables. Two graphemes are linked if they co-occur as neighbours within a syllable. Parameters ---------- syllable_network : edge list of a syllable network d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two graphemes will be proportional to the overall frequencies of the corresponding graphemes co-occurring within syllable from a syllable network """ if d == "directed": syllable_net = nx.read_weighted_edgelist(syllable_network, create_using=nx.DiGraph()) g = nx.DiGraph() elif d == "undirected": syllable_net = nx.read_weighted_edgelist(syllable_network) g = nx.Graph() for node in syllable_net.nodes(): graphemes = list(node) for i, gr in enumerate(graphemes): if i > 0: if w == "weighted": if g.has_edge(graphemes[i - 1], graphemes[i]): g[graphemes[i - 1]][graphemes[i]]['weight'] += 1 else: g.add_edge(graphemes[i - 1], graphemes[i], weight=1) elif w == "unweighted": g.add_edge(graphemes[i - 1], graphemes[i]) if w == "unweighted": nx.write_edgelist(g, syllable_network.rsplit(".", 1)[0] + "_grapheme.edges") elif w == "weighted": nx.write_weighted_edgelist(g, syllable_network.rsplit(".", 1)[0] + "_grapheme.edges") return g
def save_graph(self, graphname): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph """ import numpy as np import networkx as nx if self.modal == 'dwi': self.g.graph['ecount'] = nx.number_of_edges(self.g) nx.write_weighted_edgelist(self.g, graphname, delimiter=",") elif self.modal == 'func': np.savetxt(graphname, self.g, comments='', delimiter=',', header=','.join([str(n) for n in self.n_ids])) else: raise ValueError("Unsupported Modality.") pass
def ego_word_subnet(word_network, word, radius=1, d="directed", w="weighted", neighborhood="all"): """Creates word-ego network which is a subnetwork of neighbours centered at one specified node (word) within a given radius. Parameters ---------- word_network : edge list of original network word : string subnetwork will be created of neighbours centered at specified word radius : int radius from which subnetwork will be created d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two words will be proportional to the overall frequencies of the corresponding words co-occurrence within a original network neighborhood : successors, predecessors or all """ if d == "directed": word_net = nx.read_weighted_edgelist(word_network, create_using=nx.DiGraph()) if neighborhood == "successors": sg = nx.ego_graph(word_net, word, radius) elif neighborhood == "predecessors": sg = nx.ego_graph(word_net.reverse(), word, radius) elif neighborhood == "all": sg = nx.ego_graph(word_net, word, radius, undirected=True) elif d == "undirected": word_net = nx.read_weighted_edgelist(word_network) sg = nx.ego_graph(word_net, word, radius) if w == "unweighted": nx.write_edgelist(sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges") elif w == "weighted": nx.write_weighted_edgelist(sg, word_network.rsplit(".", 1)[0] + "_ego_subnetwork.edges") return sg
def main(): parser = argparse.ArgumentParser() parser.add_argument('edgelist', nargs='?', default='analysis/combined_716eef6.prob') parser.add_argument('outfile', nargs='?') parser.add_argument('-t', '--interconnectivity', default=0.80) parser.add_argument('-d', '--density', default=0.80) parser.add_argument('-m', '--min-edge', default=0.20) args = parser.parse_args() if args.outfile == None: args.outfile = args.edgelist.replace('.prob','') + '.analysis.tmp' threshold_min_weight = args.min_edge threshold_interconnectivity = args.interconnectivity threshold_density = args.density print_err("Loading graph") G_sim = nx.read_weighted_edgelist(enforce_min(skip_comments(open(args.edgelist, 'rb')), threshold_min_weight), nodetype=int, delimiter=',') print_err('Loaded (V={:}, E={:})'.format(len(G_sim), G_sim.size())) cc = analyse(G_sim, threshold_interconnectivity) nx.write_weighted_edgelist(G_sim.subgraph(cc), args.outfile)
def wordlist_subnet(word_network, word, words_file, d="directed", w="weighted"): """Creates word-list network which is a simple subnetwork based on provided list of words. Parameters ---------- word_network : edge list of the original network word : string words_file : file file containing words that will be extracted from the original network d : directed or undirected type of graph w : weighted or unweighted if weighted is selected than the weight of the link between two words will be proportional to the overall frequencies of the corresponding words co-occurrence within a original network """ with open(words_file, "r", encoding="utf-8") as f: word_list = f.read().splitlines() if word not in word_list: word_list.append(word) if d == "directed": word_net = nx.read_weighted_edgelist(word_network, create_using=nx.DiGraph()) sg = nx.DiGraph(word_net.subgraph(word_list)) elif d == "undirected": word_net = nx.read_weighted_edgelist(word_network) sg = nx.Graph(word_net.subgraph(word_list)) if w == "unweighted": nx.write_edgelist(sg, word_network.rsplit(".", 1)[0] + "_wordlist_subnetwork.edges") elif w == "weighted": nx.write_weighted_edgelist(sg, word_network.rsplit(".", 1)[0] + "_wordlist_subnetwork.edges") return sg
def getnx(self, ts ,save=False): ''' Create a networkx graph from a DSMACC new class Usage: getnx(a,a.ts[-1], 'propane') ''' self.create_posjac() G = nx.DiGraph() posjac = self.posjac.loc[ts,:] split = [i.split('->') for i in posjac.index] for e in range(len(split)): G.add_edge(split[e][0],split[e][1],weight=posjac[e]) G.remove_edges_from(G.selfloop_edges()) if save: nx.write_weighted_edgelist(G, save+'.wedgelist') #G=nx.read_weighted_edgelist('propane.wedgelist',create_using=nx.DiGraph) return G
def getnx(self, ts ,save=False,ignore=[] ): ''' Create a networkx graph from a DSMACC new class Usage: getnx(a,a.ts[-1], 'propane') ''' try: self.posjac except:self.create_posjac() G = nx.DiGraph() posjac = self.posjac.loc[ts,:] split = [i.split('->') for i in posjac.index] p = [i for i in posjac if i != 0 ] mn = np.min(p) mx = np.log10(np.max(p) - mn ) mn = np.log10(mn) for e in range(len(split)): if posjac[e] > 0 : G.add_edge(split[e][0],split[e][1],weight=1e-4+(np.log10(posjac[e])-mn)/mx ) G.remove_edges_from(G.selfloop_edges()) #no more zero concentration edges G = rm_nodes (G, set(G.nodes()) - set(self.spec.columns)) G = rm_nodes (G, ignore) #rm isolates G = rm_nodes(G,list(nx.isolates(G))) if save: nx.write_weighted_edgelist(G, save+'.wedgelist') #G=nx.read_weighted_edgelist('propane.wedgelist',create_using=nx.DiGraph) return G
def load_infopath(fname): # Load infopath output lines f = open(fname, 'rU') lines = [l for l in f] # Break lines into the node names and the edge attributes split = lines.index('\n') # Generate graph G = nx.DiGraph() node_names=[int(l.split(',')[0]) for l in lines[:split]] G.add_nodes_from(node_names) # Clean up edges edges_raw = [l.split(',') for l in lines[split+1:]] # edges_clean = [((int(l[0]),int(l[1])), np.mean(map(float, l[3::2]))) for l in edges_raw] edges_clean = [((int(l[0]),int(l[1])), float(l[-1])) for l in edges_raw] # Add edges to graph for e in edges_clean: if e[1]>0: G.add_edge(e[0][0],e[0][1], weight=e[1]) nx.write_weighted_edgelist(G, fname[:-4]+'_avg.txt') return G
def weight_preserving_configuration_model(G,filename=' '): import random as rn import time weight_dictionary=nx.get_edge_attributes(G,'weight'); weight_sequence=weight_dictionary.values(); degree_sequence=list(nx.degree(G).values()); rn.seed(rn.randint(0,1000000)+time.time()); E=nx.configuration_model(degree_sequence); E=nx.Graph(E); E.remove_edges_from(E.selfloop_edges()); weight_sequence_temp=weight_sequence; for t in range(100): rn.shuffle(weight_sequence_temp); for e in E.edges_iter(): E.edge[e[0]][e[1]]['weight']=weight_sequence_temp[0]; weight_sequence_temp=weight_sequence_temp[1:]; if filename!=' ': nx.write_weighted_edgelist(E, filename , delimiter=' ', encoding='utf-8') print('Randomized edgelist dumped to '+ filename); return E;
def collect_comps(G, strongly, op, path): if strongly: cc_gen = nx.strongly_connected_components(G) ty = 'S' else: cc_gen = nx.weakly_connected_components(G) ty = 'W' if op == 1: ex.collect_alt_views(ex.gen_view(cc_gen), path + "%sCCsXCountView.txt" % ty, \ comments= "Vertex from %sCC; Count of vertex in %sCC" % (ty,ty)) elif op == 2: # write raw trpl file of only vert in giant comp giantcc = cull_comps(G.copy(), cc_gen, True) fn = 'txTripletsCounts%sGiantOnly.txt' % ty print "Writing %s" % fn nx.write_weighted_edgelist(giantcc,path + fn) nx.write_weighted_edgelist(giantcc,'../' + fn + '.gz') elif op == 3: # write raw trpl file of only vert not in giant comp giantcc = cull_comps(G.copy(), cc_gen, False) fn = 'txTripletsCountsNo%sGiant.txt' % ty nx.write_weighted_edgelist(giantcc,path + fn) return None
def writeGraph(graph,filePath,weighted=True): with open(filePath,'w') as f: if not weighted: write_edgelist(graph,f); else: write_weighted_edgelist(graph,f);
def syntax_net(corpus, d="directed", w="weighted"): with open(corpus, "r", encoding="utf-8") as f: lines = f.readlines() lines.append("") sentences = [] current = [] for l in lines: cleaned = l.strip() if len(cleaned) == 0: sentences.append(current) current = [] else: current.append(tuple(cleaned.split("\t"))) lines_parsed = sentences def extract(sentence): reduced = [(0, 0, "ROOT", "Z")] + [(int(w[0]), int(w[6]), w[1], w[4]) for w in sentence] return reduced def remove_special(sentence): def first_special(sent): for word in sent: if word[2] == "--" or word[2] == "-" or word[2] == "%": continue if word[3] == "Z": return word[0], word[1] return () def rename(name): if name == "--" or name == "-": return "HYPHEN" elif name == "%": return "PERCENT" else: return name reduced = sentence to_replace = first_special(reduced) while to_replace: new_reduced = [] is_first = True for word in reduced: if word[0] == to_replace[0]: continue if word[1] == to_replace[0]: if is_first: is_first = False parent = to_replace[1] parent_rest = word[0] if to_replace[0] == to_replace[1]: parent = word[0] new_reduced.append((word[0], parent, rename(word[2]), word[3])) else: new_reduced.append((word[0], parent_rest, rename(word[2]), word[3])) else: new_reduced.append((word[0], word[1], rename(word[2]), word[3])) reduced = new_reduced to_replace = first_special(reduced) return reduced reduced_sentences = [remove_special(extract(sent)) for sent in lines_parsed] syntax_edges = dict() for sentence, i in zip(reduced_sentences, range(len(reduced_sentences))): name_map = dict() for word in sentence: name_map[word[0]] = word[2] for word in sentence: parent = word[1] current = word[0] edge = (name_map[parent], name_map[current]) if edge in syntax_edges: syntax_edges[edge] += 1 else: syntax_edges[edge] = 1 syntax_list = [(k[0], k[1], v) for (k, v) in syntax_edges.items()] if d == "directed": g = nx.DiGraph() elif d == "undirected": g = nx.Graph() if w == "unweighted": g.add_edges_from(syntax_list) nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_syntax.edges") elif w == "weighted": g.add_weighted_edges_from(syntax_list) nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_syntax.edges") return g
def cooccurrence_net(corpus, delimiter_list, d="directed", w="weighted", window=1, lower="Yes"): """Creates co-occurrence network from text file. Links are established within a window between the first word and n-1 subsequent words. Parameters ---------- corpus : file original text file from which the network will be created delimiter_list : list list of delimiters d : directed or undirected type of graph that will be created w : weighted or unweighted if weighted is selected than the weight of the link between two words will be proportional to the overall frequencies of the corresponding words co-occurrence within a original network window : int set of n subsequent words from a text lower : Yes or No defines whether all characters in a text will be changed to lower or not """ with open(corpus, "r", encoding="utf-8") as f: if lower == "Yes": c_list = f.read().lower().split() elif lower == "No": c_list = f.read().split() if d == "directed": g = nx.DiGraph() elif d == "undirected": g = nx.Graph() delimiters = "".join(delimiter_list) if w == "unweighted": for i, word in enumerate(c_list): for j in range(1, window + 1): if i - j >= 0 and c_list[i - j][-1] not in delimiter_list: g.add_edge(c_list[i - j], c_list[i].strip(delimiters)) else: break nx.write_edgelist(g, corpus.rsplit(".", 1)[0] + "_coocurrence.edges") elif w == "weighted": for i, word in enumerate(c_list): for j in range(1, window + 1): if i - j >= 0 and c_list[i - j][-1] not in delimiter_list: if g.has_edge(c_list[i - j], c_list[i].strip(delimiters)): g[c_list[i - j]][c_list[i].strip(delimiters)]['weight'] += 1 else: g.add_edge(c_list[i - j], c_list[i].strip(delimiters), weight=1) else: break nx.write_weighted_edgelist(g, corpus.rsplit(".", 1)[0] + "_coocurrence.edges") return g