Ejemplo n.º 1
0
def pajek_to_files(name, url, pajek_lines):
    if pajek_lines:
        try:
            check_matrix = pajek_lines.find('*matrix')
            if check_matrix != -1:
                pajek_lines = pajek_lines[check_matrix + 6:].strip(' ').strip('\r').strip('\n')
                matrix_lines = pajek_lines.split('\r')
                numbers_exp = re.compile(r'[0-9]')
                append = ";"
                for i in range(len(matrix_lines)):
                    if numbers_exp.search(matrix_lines[i]):
                        matrix_lines[i] = matrix_lines[i].strip('\n') + append
                    else:
                        matrix_lines[i] = ''
                matrix_lines = list(filter(lambda x: x is not '', matrix_lines))
                adj_matrix = " ".join(matrix_lines)
                adj_matrix = adj_matrix[:len(adj_matrix) - 1]
                # print(np.matrix(adj_matrix))
                G = nx.from_numpy_array(np.matrix(adj_matrix))
            else:
                G = nx.parse_pajek(pajek_lines)
            if not nx.is_empty(G):
                old_attributes = list(G.nodes)
                G = nx.convert_node_labels_to_integers(G)
                id_mapping = []
                node_list = list(G.nodes)
                for i in range(len(node_list)):
                    id_mapping.append([old_attributes[i], str(node_list[i])])
                mapping_file = open('../pajek_networks/node_id_mappings/mapping_' + url.split('/')[-1] + '.csv', 'w',
                                    newline='')
                mapping_file_writer = csv.writer(mapping_file)
                mapping_file_writer.writerow(['id', 'name'])
                for tup in id_mapping:
                    mapping_file_writer.writerow(list(tup))
                nx.write_edgelist(G, '../pajek_networks/edge_lists/' + url.split('/')[-1] + '.csv',
                                           delimiter=',')
                utils.insert_into_db(name, url, '/pajek_networks/edge_lists/' + url.split('/')[-1] + '.csv',
                                     '/pajek_networks/node_id_mappings/mapping_' + url.split('/')[-1] + '.csv',
                                     G.is_directed(),
                                     G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
        except Exception as e:
            traceback.print_exc()
            print(e)
            print("Couldn't parse " + url)
def node_id_write(G, url, edge_list_path, node_id_path, name):
    old_attributes = list(G.nodes)
    G = nx.convert_node_labels_to_integers(G)
    id_mapping = []
    node_list = list(G.nodes)
    for i in range(len(node_list)):
        id_mapping.append([old_attributes[i], str(node_list[i])])
    mapping_file = open(node_id_path + name + '.csv', 'w', newline='')
    mapping_file_writer = csv.writer(mapping_file)
    mapping_file_writer.writerow(['id', 'name'])
    for tup in id_mapping:
        mapping_file_writer.writerow(list(tup))
    mapping_file.close()
    nx.write_edgelist(G, edge_list_path + name + '.csv')
    utils.insert_into_db(name, url, edge_list_path + name + '.csv',
                         node_id_path + name + '.csv', G.is_directed(),
                         G.is_multigraph(), int(G.number_of_nodes()),
                         int(nx.number_of_selfloops(G)))
    return G
    'Japanese Word Adjacency': ['http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/japanesebookinter_st.txt'],
    'Protein Structure': [
        'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1a4jinter_st.txt',
        'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1eawinter_st.txt',
        'http://www.weizmann.ac.il/mcb/UriAlon/sites/mcb.UriAlon/files/uploads/CollectionsOfComplexNetwroks/1aorinter_st.txt']
}

edge_list_path = networks['E. Coli Transcription'][0]
mapping_path = networks['E. Coli Transcription'][1]

with urllib.request.urlopen(edge_list_path) as e_coli_fp:
    lines = e_coli_fp.read()
G = nx.read_weighted_edgelist(io.BytesIO(lines), delimiter=' ')
nx.write_edgelist(G, '../uri_alon_networks/edge_lists/e_coli_interaction.txt', delimiter=',')
with urllib.request.urlopen(mapping_path) as mapping_fp:
    mapping_lines = mapping_fp.read().decode('utf-8').split('\n')
with open('../uri_alon_networks/node_id_mappings/mapping_e_coli_interaction.txt', 'w') as e_coli_mapping_fp:
    e_coli_mapping_fp.writelines(map(lambda x: x.replace(' ', ',') + '\n', mapping_lines))
utils.insert_into_db('E. Coli Transcription', edge_list_path, '/uri_alon_networks/edge_lists/e_coli_interaction.txt',
                  '/uri_alon_networks/node_id_mappings/mapping_e_coli_interaction.txt', G.is_directed(),
                  G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
for net in networks.keys():
    if net != 'E. Coli Transcription':
        for subnet in networks[net]:
            with urllib.request.urlopen(subnet) as subnet_fp:
                lines = subnet_fp.read()
            G = nx.read_weighted_edgelist(io.BytesIO(lines), delimiter=' ')
            nx.write_edgelist(G, '../uri_alon_networks/edge_lists/' + subnet.split('/')[-1], delimiter=',')
            utils.insert_into_db(net + subnet.split('/')[-1].split('.')[-2], net+ subnet, '../uri_alon_networks/edge_lists/' + subnet.split('/')[-1], 'N/A',                             G.is_directed(),
                  G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
Ejemplo n.º 4
0
for link in parsed_html.find_all('a'):
    if 'zip' in link.get('href'):
        url = data_url + link.get('href')
        pajek_lines = []
        graph_zipped = utils.get_zip_fp(url)
        for file in graph_zipped.infolist():
            ext = file.filename[-3:].lower()
            if ext == "net" or ext == "paj":
                pajek_lines = graph_zipped.read(file.filename).decode('utf-8')
                if 'jazz' in file.filename:
                    pajek_lines = "\n".join(
                        list(
                            map(
                                lambda x: " ".join(
                                    x.strip(' ').replace('\t', '').split(' ')),
                                pajek_lines.split('\n')[3:])))
                    G = nx.parse_edgelist(pajek_lines)
                utils.pajek_to_files(link.string, url, pajek_lines,
                                     '/arenas_networks')
            elif ext == 'txt':
                G = nx.read_weighted_edgelist(
                    io.BytesIO(graph_zipped.read(file.filename)))
                nx.write_edgelist(
                    G, '../arenas_networks/edge_lists/' +
                    file.filename.replace('txt', 'csv'))
                utils.insert_into_db(
                    file.filename, url, '/arenas_networks/edge_lists/' +
                    file.filename.replace('txt', 'csv'), '', G.is_directed(),
                    G.is_multigraph(), int(G.number_of_nodes()),
                    int(nx.number_of_selfloops(G)))
import networkx as nx
from glob import glob
import graph_info_csv_helpers as utils

ucinet_graph_ml_path = '../arenas_networks/'

for graph_ml_file in ['../arenas_networks/jazz.graphml']:
    G = nx.read_graphml(graph_ml_file)
    nx.write_edgelist(G,
                      '../arenas_networks/edge_lists/' +
                      graph_ml_file.split('/')[-1].split('.')[0] + '.csv',
                      delimiter=',')
    utils.insert_into_db(
        graph_ml_file.split('/')[-1].split('.')[0] + '.csv',
        "http://deim.urv.cat/~alexandre.arenas/data/welcome.htm",
        '../dl_networks/edge_lists/' +
        graph_ml_file.split('/')[-1].split('.')[0] + '.csv', '',
        G.is_directed(), G.is_multigraph(), int(G.number_of_nodes()),
        int(nx.number_of_selfloops(G)))
from glob import glob
import networkx as nx
import graph_info_csv_helpers as utils

base_dir = '../c_elegans_networks/xls_files/*'
node_id_path = '../c_elegans_networks/node_id_mappings/'
edge_list_path = '../c_elegans_networks/edge_lists/'

for edge_list in glob(base_dir):
    G = nx.read_weighted_edgelist(edge_list, delimiter=',', create_using=nx.MultiDiGraph)
    old_attributes = list(G.nodes)
    G = nx.convert_node_labels_to_integers(G)
    id_mapping = []
    node_list = list(G.nodes)
    name = edge_list.split('/')[-1]
    for i in range(len(node_list)):
        id_mapping.append([old_attributes[i], str(node_list[i])])
    mapping_file = open(node_id_path +  name,
                        'w',
                        newline='')
    mapping_file_writer = csv.writer(mapping_file)
    mapping_file_writer.writerow(['id', 'name'])
    for tup in id_mapping:
        mapping_file_writer.writerow(list(tup))
    mapping_file.close()
    nx.write_edgelist(G, edge_list_path  + name, delimiter=',')
    utils.insert_into_db(name, 'https://www.wormatlas.org/neuronalwiring.html', edge_list_path + name,
                   node_id_path + name + '.csv',
                   G.is_directed(),
                   G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
Ejemplo n.º 7
0
                        dict, G = gml.parse_gml(gml_lines, label='id')
                    mapping_file = open(
                        '../newman_networks/node_id_mappings/mapping_' +
                        file.filename.split('.')[0] + '.csv',
                        'w',
                        newline='')
                    mapping_file_writer = csv.writer(mapping_file)
                    mapping_file_writer.writerow(dict['node'][0].keys())
                    for node in dict['node']:
                        G.add_node(node['id'])
                        mapping_file_writer.writerow(node.values())
                    for edge in dict['edge']:
                        if 'value' in edge.keys():
                            G.add_weighted_edges_from([
                                (edge['source'], edge['target'], edge['value'])
                            ])
                        else:
                            G.add_edge(edge['source'], edge['target'])
                    nx.write_edgelist(G,
                                      '../newman_networks/edge_lists/' +
                                      file.filename.split('.')[0] + '.csv',
                                      delimiter=',')
                    mapping_file.close()
                    utils.insert_into_db(
                        name, url, '/newman_networks/edge_lists/' +
                        file.filename.split('.')[0] + '.csv',
                        '/newman_networks/node_id_mappings/mapping_' +
                        file.filename.split('.')[0] + '.csv', G.is_directed(),
                        G.is_multigraph(), int(G.number_of_nodes()),
                        int(nx.number_of_selfloops(G)))
import networkx as nx
from glob import glob
import graph_info_csv_helpers as utils
barabasi_path = '../barabasi_networks/edge_lists/*'

for edge_file in glob(barabasi_path):
    G = nx.read_weighted_edgelist(edge_file)
    nx.write_edgelist(G, '../barabasi_networks/edge_lists/' + edge_file.split('/')[-1].split('.')[0] + '.csv',
                               delimiter=',')
    utils.insert_into_db(edge_file.split('/')[-1].split('.')[0] + '.csv',
                         "http://networksciencebook.com/translations/en/resources/data.html",
                         '../barabasi_networks/edge_lists/' + edge_file.split('/')[-1].split('.')[0] + '.csv',
                         '', G.is_directed(),
                         G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))
Ejemplo n.º 9
0
import gzip
import networkx as nx
import graph_info_csv_helpers as utils
import urllib.request

facebook_links = "http://socialnetworks.mpi-sws.mpg.de/data/facebook-links.txt.gz"
facebook_post = "http://socialnetworks.mpi-sws.mpg.de/data/facebook-wall.txt.gz"

networks = [facebook_post, facebook_links]

for net in networks:
    with urllib.request.urlopen(net) as net_fp:
        ungzipped = gzip.open(net_fp).read().decode('utf-8').strip('\n').split(
            '\n')
    cleaned = list(map(lambda x: x.replace('\\N', ''), ungzipped))
    name = net.split('/')[-1].split('.')[0]
    G = nx.read_weighted_edgelist(cleaned)
    nx.write_edgelist(G,
                      '../max_planck_networks/edge_lists/' + name + '.csv',
                      delimiter=',')
    utils.insert_into_db(name, net,
                         '../max_planck_networks/edge_lists/' + name + '.csv',
                         '', G.is_directed(), G.is_multigraph(),
                         int(G.number_of_nodes()),
                         int(nx.number_of_selfloops(G)))
                    for other_files in zip_dir.infolist():
                        ext = other_files.filename[-5:].lower()
                        if ext == 'edges':
                            G = nx.read_weighted_edgelist(
                                io.BytesIO(zip_dir.read(other_files.filename)))
                            G = node_id_write(G, edge_list_path, node_id_path,
                                              name)
                            nx.write_edgelist(G,
                                              edge_list_path + name + '.csv')
                            metadata[
                                'file_path'] = edge_list_path + name + '.csv'
                            metadata['cleaned'] = True
                            with open(
                                    'metadata_' + edge_list_path + name +
                                    '.json', 'w') as metadata_fp:
                                json.dump(metadata, metadata_fp)
                            utils.insert_into_db(
                                name, url, edge_list_path + name + '.csv',
                                node_id_path + name + '.csv', G.is_directed(),
                                G.is_multigraph(), int(G.number_of_nodes()),
                                int(nx.number_of_selfloops(G)))
    except ValueError as e:
        print(url)
        print(e)
    except TypeError as e:
        print(url)
        print(e)
    except Exception as e:
        print(e)
        print("I'm not sure what went wrong.")
Ejemplo n.º 11
0
import networkx as nx
from glob import glob
import graph_info_csv_helpers as utils

ucinet_graph_ml_path = '../dl_networks/graph_ml/*'

for graph_ml_file in glob(ucinet_graph_ml_path):
    G = nx.read_graphml(graph_ml_file)
    nx.write_edgelist(G, '../dl_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv',
                               delimiter=',')
    utils.insert_into_db(graph_ml_file.split('/')[-1].split('.')[0] + '.csv',
                         "http://vlado.fmf.uni-lj.si/pub/networks/data/ucinet/",
                         '../dl_networks/edge_lists/' + graph_ml_file.split('/')[-1].split('.')[0] + '.csv',
                         '', G.is_directed(),
                         G.is_multigraph(), int(G.number_of_nodes()), int(nx.number_of_selfloops(G)))