def pajek_to_files(name, url, pajek_lines): if pajek_lines: try: check_matrix = pajek_lines.find('*matrix') if check_matrix != -1: pajek_lines = pajek_lines[check_matrix + 6:].strip(' ').strip('\r').strip('\n') matrix_lines = pajek_lines.split('\r') numbers_exp = re.compile(r'[0-9]') append = ";" for i in range(len(matrix_lines)): if numbers_exp.search(matrix_lines[i]): matrix_lines[i] = matrix_lines[i].strip('\n') + append else: matrix_lines[i] = '' matrix_lines = list(filter(lambda x: x is not '', matrix_lines)) adj_matrix = " ".join(matrix_lines) adj_matrix = adj_matrix[:len(adj_matrix) - 1] # print(np.matrix(adj_matrix)) G = nx.from_numpy_array(np.matrix(adj_matrix)) else: G = nx.parse_pajek(pajek_lines) if not nx.is_empty(G): old_attributes = list(G.nodes) G = nx.convert_node_labels_to_integers(G) id_mapping = [] node_list = list(G.nodes) for i in range(len(node_list)): id_mapping.append([old_attributes[i], str(node_list[i])]) mapping_file = open('../pajek_networks/node_id_mappings/mapping_' + url.split('/')[-1] + '.csv', 'w', newline='') mapping_file_writer = csv.writer(mapping_file) mapping_file_writer.writerow(['id', 'name']) for tup in id_mapping: mapping_file_writer.writerow(list(tup)) nx.write_edgelist(G, '../pajek_networks/edge_lists/' + url.split('/')[-1] + '.csv', delimiter=',') utils.insert_into_db(name, url, '/pajek_networks/edge_lists/' + url.split('/')[-1] + '.csv', '/pajek_networks/node_id_mappings/mapping_' + url.split('/')[-1] + '.csv', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G))) except Exception as e: traceback.print_exc() print(e) print("Couldn't parse " + url)
def node_id_write(G, url, edge_list_path, node_id_path, name): old_attributes = list(G.nodes) G = nx.convert_node_labels_to_integers(G) id_mapping = [] node_list = list(G.nodes) for i in range(len(node_list)): id_mapping.append([old_attributes[i], str(node_list[i])]) mapping_file = open(node_id_path + name + '.csv', 'w', newline='') mapping_file_writer = csv.writer(mapping_file) mapping_file_writer.writerow(['id', 'name']) for tup in id_mapping: mapping_file_writer.writerow(list(tup)) mapping_file.close() nx.write_edgelist(G, edge_list_path + name + '.csv') utils.insert_into_db(name, url, edge_list_path + name + '.csv', node_id_path + name + '.csv', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G))) return G
'Japanese Word Adjacency': ['http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/japanesebookinter_st.txt'], 'Protein Structure': [ 'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1a4jinter_st.txt', 'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1eawinter_st.txt', 'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1aorinter_st.txt'] } edge_list_path = networks['E. Coli Transcription'][0] mapping_path = networks['E. Coli Transcription'][1] with urllib.request.urlopen(edge_list_path) as e_coli_fp: lines = e_coli_fp.read() G = nx.read_weighted_edgelist(io.BytesIO(lines), delimiter=' ') nx.write_edgelist(G, '../uri_alon_networks/edge_lists/e_coli_interaction.txt', delimiter=',') with urllib.request.urlopen(mapping_path) as mapping_fp: mapping_lines = mapping_fp.read().decode('utf-8').split('\n') with open('../uri_alon_networks/node_id_mappings/mapping_e_coli_interaction.txt', 'w') as e_coli_mapping_fp: e_coli_mapping_fp.writelines(map(lambda x: x.replace(' ', ',') + '\n', mapping_lines)) utils.insert_into_db('E. Coli Transcription', edge_list_path, '/uri_alon_networks/edge_lists/e_coli_interaction.txt', '/uri_alon_networks/node_id_mappings/mapping_e_coli_interaction.txt', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G))) for net in networks.keys(): if net != 'E. Coli Transcription': for subnet in networks[net]: with urllib.request.urlopen(subnet) as subnet_fp: lines = subnet_fp.read() G = nx.read_weighted_edgelist(io.BytesIO(lines), delimiter=' ') nx.write_edgelist(G, '../uri_alon_networks/edge_lists/' + subnet.split('/')[-1], delimiter=',') utils.insert_into_db(net + subnet.split('/')[-1].split('.')[-2], net+ subnet, '../uri_alon_networks/edge_lists/' + subnet.split('/')[-1], 'N/A', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
for link in parsed_html.find_all('a'): if 'zip' in link.get('href'): url = data_url + link.get('href') pajek_lines = [] graph_zipped = utils.get_zip_fp(url) for file in graph_zipped.infolist(): ext = file.filename[-3:].lower() if ext == "net" or ext == "paj": pajek_lines = graph_zipped.read(file.filename).decode('utf-8') if 'jazz' in file.filename: pajek_lines = "\n".join( list( map( lambda x: " ".join( x.strip(' ').replace('\t', '').split(' ')), pajek_lines.split('\n')[3:]))) G = nx.parse_edgelist(pajek_lines) utils.pajek_to_files(link.string, url, pajek_lines, '/arenas_networks') elif ext == 'txt': G = nx.read_weighted_edgelist( io.BytesIO(graph_zipped.read(file.filename))) nx.write_edgelist( G, '../arenas_networks/edge_lists/' + file.filename.replace('txt', 'csv')) utils.insert_into_db( file.filename, url, '/arenas_networks/edge_lists/' + file.filename.replace('txt', 'csv'), '', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
import networkx as nx from glob import glob import graph_info_csv_helpers as utils ucinet_graph_ml_path = '../arenas_networks/' for graph_ml_file in ['../arenas_networks/jazz.graphml']: G = nx.read_graphml(graph_ml_file) nx.write_edgelist(G, '../arenas_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv', delimiter=',') utils.insert_into_db( graph_ml_file.split('/')[-1].split('.')[0] + '.csv', "http://deim.urv.cat/~alexandre.arenas/data/welcome.htm", '../dl_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv', '', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
from glob import glob import networkx as nx import graph_info_csv_helpers as utils base_dir = '../c_elegans_networks/xls_files/*' node_id_path = '../c_elegans_networks/node_id_mappings/' edge_list_path = '../c_elegans_networks/edge_lists/' for edge_list in glob(base_dir): G = nx.read_weighted_edgelist(edge_list, delimiter=',', create_using=nx.MultiDiGraph) old_attributes = list(G.nodes) G = nx.convert_node_labels_to_integers(G) id_mapping = [] node_list = list(G.nodes) name = edge_list.split('/')[-1] for i in range(len(node_list)): id_mapping.append([old_attributes[i], str(node_list[i])]) mapping_file = open(node_id_path + name, 'w', newline='') mapping_file_writer = csv.writer(mapping_file) mapping_file_writer.writerow(['id', 'name']) for tup in id_mapping: mapping_file_writer.writerow(list(tup)) mapping_file.close() nx.write_edgelist(G, edge_list_path + name, delimiter=',') utils.insert_into_db(name, 'https://www.wormatlas.org/neuronalwiring.html', edge_list_path + name, node_id_path + name + '.csv', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
dict, G = gml.parse_gml(gml_lines, label='id') mapping_file = open( '../newman_networks/node_id_mappings/mapping_' + file.filename.split('.')[0] + '.csv', 'w', newline='') mapping_file_writer = csv.writer(mapping_file) mapping_file_writer.writerow(dict['node'][0].keys()) for node in dict['node']: G.add_node(node['id']) mapping_file_writer.writerow(node.values()) for edge in dict['edge']: if 'value' in edge.keys(): G.add_weighted_edges_from([ (edge['source'], edge['target'], edge['value']) ]) else: G.add_edge(edge['source'], edge['target']) nx.write_edgelist(G, '../newman_networks/edge_lists/' + file.filename.split('.')[0] + '.csv', delimiter=',') mapping_file.close() utils.insert_into_db( name, url, '/newman_networks/edge_lists/' + file.filename.split('.')[0] + '.csv', '/newman_networks/node_id_mappings/mapping_' + file.filename.split('.')[0] + '.csv', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
import networkx as nx from glob import glob import graph_info_csv_helpers as utils barabasi_path = '../barabasi_networks/edge_lists/*' for edge_file in glob(barabasi_path): G = nx.read_weighted_edgelist(edge_file) nx.write_edgelist(G, '../barabasi_networks/edge_lists/' + edge_file.split('/')[-1].split('.')[0] + '.csv', delimiter=',') utils.insert_into_db(edge_file.split('/')[-1].split('.')[0] + '.csv', "http://networksciencebook.com/translations/en/resources/data.html", '../barabasi_networks/edge_lists/' + edge_file.split('/')[-1].split('.')[0] + '.csv', '', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
import gzip import networkx as nx import graph_info_csv_helpers as utils import urllib.request facebook_links = "http://socialnetworks.mpi-sws.mpg.de/data/facebook-links.txt.gz" facebook_post = "http://socialnetworks.mpi-sws.mpg.de/data/facebook-wall.txt.gz" networks = [facebook_post, facebook_links] for net in networks: with urllib.request.urlopen(net) as net_fp: ungzipped = gzip.open(net_fp).read().decode('utf-8').strip('\n').split( '\n') cleaned = list(map(lambda x: x.replace('\\N', ''), ungzipped)) name = net.split('/')[-1].split('.')[0] G = nx.read_weighted_edgelist(cleaned) nx.write_edgelist(G, '../max_planck_networks/edge_lists/' + name + '.csv', delimiter=',') utils.insert_into_db(name, net, '../max_planck_networks/edge_lists/' + name + '.csv', '', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
for other_files in zip_dir.infolist(): ext = other_files.filename[-5:].lower() if ext == 'edges': G = nx.read_weighted_edgelist( io.BytesIO(zip_dir.read(other_files.filename))) G = node_id_write(G, edge_list_path, node_id_path, name) nx.write_edgelist(G, edge_list_path + name + '.csv') metadata[ 'file_path'] = edge_list_path + name + '.csv' metadata['cleaned'] = True with open( 'metadata_' + edge_list_path + name + '.json', 'w') as metadata_fp: json.dump(metadata, metadata_fp) utils.insert_into_db( name, url, edge_list_path + name + '.csv', node_id_path + name + '.csv', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G))) except ValueError as e: print(url) print(e) except TypeError as e: print(url) print(e) except Exception as e: print(e) print("I'm not sure what went wrong.")
import networkx as nx from glob import glob import graph_info_csv_helpers as utils ucinet_graph_ml_path = '../dl_networks/graph_ml/*' for graph_ml_file in glob(ucinet_graph_ml_path): G = nx.read_graphml(graph_ml_file) nx.write_edgelist(G, '../dl_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv', delimiter=',') utils.insert_into_db(graph_ml_file.split('/')[-1].split('.')[0] + '.csv', "http://vlado.fmf.uni-lj.si/pub/networks/data/ucinet/", '../dl_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv', '', G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))