예제 #1
0
def create_background_network(save_name='background_network'):
    """

    Parameters
    ----------
    save_name : str

    Returns
    -------

    """

    reactome_network = load_reactome_fi()
    kegg_network = load_all_of_kegg(fresh_download=True)
    hmdb_network = load_hmdb_network(fresh_download=True)
    biogrid_network = load_biogrid_network(fresh_download=True)
    signor_network = load_signor(fresh_download=True)

    def find_overlap(n1, n2):
        nodes1 = set(n1.nodes())
        nodes2 = set(n2.nodes())
        e1 = set(n1.edges())
        e2 = set(n2.edges())
        print("\tnode overlap = {}".format(len(nodes1.intersection(nodes2))))
        print("\tnode difference = {} | {}".format(
            len(nodes1.intersection(nodes2)),
            len(nodes2.intersection(nodes1)))
        )
        print("\tedge overlap = {}".format(len(e2.intersection(e1))))
        print("\tedge difference = {} | {}".format(len(e2.difference(e1)),
                                                   len(e2.difference(e1))))

    network_list = [hmdb_network, kegg_network, biogrid_network,
                    reactome_network, signor_network]
    names = ['hmdb', 'kegg', 'biogrid', 'reactome', 'signor']

    for i, n in zip(network_list, names):
        for j, m in zip(network_list, names):
            if n != m:
                print('{} : {}'.format(n, m))
                find_overlap(i, j)

    full_network = nt.compose_all(
        [hmdb_network, kegg_network, biogrid_network, reactome_network,
         signor_network]
    )

    nt.delete_disconnected_network(full_network)
    nt.standardize_edge_types(full_network)

    # find_overlap(reactome_network, full_network)
    n_nodes = len(full_network.nodes)
    n_edges = len(full_network.edges)
    print("Background network {} nodes and {} edges".format(n_nodes, n_edges))

    nx.write_gpickle(full_network, '{}.p.gz'.format(save_name))
예제 #2
0
def build_network(seed_species, species='hsa', save_name=None,
                  all_measured_list=None, trim_source_sink=False,
                  use_reactome=True, use_hmdb=False,
                  use_biogrid=True, use_signor=True, verbose=False):
    """
    Construct a network from a list of gene names.

    Parameters
    ----------

    seed_species : list
        list of genes to construct network
    save_name : str, optional
        output name to save network. Will save one before and after ID
        conversion
    species : str
        species of proteins ('hsa': human, 'mmu':murine)
    all_measured_list : list
        list of all species that should be considered in network
    use_reactome : bool
        Add ReactomeFunctionalInteraction reaction to network
    use_biogrid : bool
        Add BioGrid reaction to network
    use_hmdb : bool
        Add HMDB reaction to network
        all_measured_list
    use_signor : bool
        Add SIGNOR reaction to network
    trim_source_sink : bool, optional
        Remove source and sink nodes if they are not measured in network
    verbose : bool

    Returns
    -------
    networkx.DiGraph
    """

    path_to_graph, node_to_path = load_kegg_mappings(species, verbose=False)

    seed_species = set(x.upper() for x in seed_species)
    updated_accession = set()
    old_accession = set()
    for i in seed_species:
        if i.startswith('HMDB'):
            if i in cm.hmdb_accession_to_main:
                old_accession.add(i)
                updated_accession.add(cm.hmdb_accession_to_main[i][0])

    seed_species.difference_update(old_accession)
    seed_species.update(updated_accession)

    seeds_in_kegg = seed_species.intersection(node_to_path)

    pathway_list = set()
    for seed in seeds_in_kegg:
        pathway_list.update(node_to_path[seed])

    graph_list = []
    for each in pathway_list:
        tmp = path_to_graph[each]
        if len(tmp.edges) == 0:
            continue
        graph_list.append(tmp)
    end_network = nt.compose_all(graph_list)

    if all_measured_list is None:
        all_measured_set = set(i.upper() for i in end_network.nodes)
    else:
        all_measured_set = set(str(x).upper() for x in all_measured_list)

    all_measured_set.update(seed_species)
    hmdb_ids = set(i for i in all_measured_set if i.startswith('HMDB'))
    updated_accession = set()
    old_accession = set()
    for i in hmdb_ids:
        if i in cm.hmdb_accession_to_main:
            all_measured_set.remove(i)
            all_measured_set.add(cm.hmdb_accession_to_main[i][0])
    networks_to_expand = []

    if use_hmdb:
        networks_to_expand.append(load_hmdb_network(verbose))

    if use_reactome:
        networks_to_expand.append(load_reactome_fi(verbose))

    if use_biogrid:
        networks_to_expand.append(load_biogrid_network(verbose))

    if use_signor:
        networks_to_expand.append(load_signor(verbose))

    if len(networks_to_expand) != 0:
        entire_expansion_network = nt.compose_all(networks_to_expand)
        end_network = expand_by_db(end_network, entire_expansion_network,
                                   all_measured_set)

    print("Trimming network")
    # makes all similar edge names the same
    nt.standardize_edge_types(end_network)
    # removes everything not connected to the largest graph
    end_network = nt.delete_disconnected_network(end_network)

    if trim_source_sink:
        end_network = nt.trim_sink_source_nodes(end_network, all_measured_list,
                                                remove_self_edge=True)
    if save_name is not None:
        nx.write_gml(end_network, '{}.gml'.format(save_name))
        nx.write_gpickle(end_network, '{}.p'.format(save_name))

    final_nodes = set(end_network.nodes)
    n_hits = len(seed_species.intersection(final_nodes))

    print('Network has {} nodes and {} edges'.format(len(final_nodes),
                                                     len(end_network.edges)))

    print("Found {} of {} seed species in network"
          "".format(n_hits, len(seed_species)))
    if all_measured_list is not None:
        n_measured_hits = len(set(all_measured_list).intersection(final_nodes))
        print("Found {} of {} background species in network"
              "".format(n_measured_hits, len(all_measured_list)))

    return end_network
예제 #3
0
def create_background_network(save_name='background_network',
                              fresh_download=False,
                              verbose=True,
                              create_overlap=False):
    """

    Parameters
    ----------
    save_name : str
        Name of the network
    fresh_download : bool
        Download a fresh copy of the databases
    verbose: bool
        Print information about the databases
    create_overlap : bool
        Creates a figure comparing the databses
    Returns
    -------
    nx.DiGraph
    """

    kegg_network = db.load_all_of_kegg(fresh_download=fresh_download,
                                       verbose=verbose)
    hmdb_network = db.load_hmdb_network(fresh_download=fresh_download,
                                        verbose=verbose)
    biogrid_network = db.load_biogrid_network(fresh_download=fresh_download,
                                              verbose=verbose)
    signor_network = db.load_signor(fresh_download=fresh_download,
                                    verbose=verbose)
    reactome_network = db.load_reactome_fi(verbose=verbose)
    network_list = [
        hmdb_network, kegg_network, biogrid_network, reactome_network,
        signor_network
    ]
    names = ['hmdb', 'kegg', 'biogrid', 'reactome', 'signor']

    def find_overlap(n1, n2):
        nodes1 = set(n1.nodes())
        nodes2 = set(n2.nodes())
        e1 = set(n1.edges())
        e2 = set(n2.edges())
        edge_overlap = len(e2.intersection(e1))
        node_overlap = len(nodes1.intersection(nodes2))
        print("\tnode overlap = {}".format(node_overlap))
        print("\tedge overlap = {}".format(edge_overlap))
        return node_overlap, edge_overlap

    if create_overlap:
        db_maps = {i: j for i, j in zip(names, network_list)}
        n_dbs = len(names)

        pal = sns.light_palette("purple", as_cmap=True)
        node_mat = np.zeros((n_dbs, n_dbs), dtype=np.int)
        edge_mat = np.zeros((n_dbs, n_dbs), dtype=np.int)

        for i in range(n_dbs):
            row = db_maps[names[i]]
            for j in range(i + 1, n_dbs):
                col = db_maps[names[j]]
                n_overlap, e_overlap = find_overlap(row, col)
                node_mat[i, j] = n_overlap
                node_mat[j, i] = node_mat[i, j]
                edge_mat[i, j] = e_overlap
                edge_mat[j, i] = edge_mat[i, j]

        fig = plt.figure(figsize=(10, 4))
        ax = fig.add_subplot(121)
        plt.title("Number of node overlaps")
        sns.heatmap(node_mat,
                    fmt='d',
                    annot=True,
                    linewidths=0.02,
                    cmap=pal,
                    yticklabels=names,
                    xticklabels=names)
        plt.yticks(rotation=0)
        ax = fig.add_subplot(122)
        plt.title("Number of edge overlaps")
        sns.heatmap(edge_mat,
                    fmt='d',
                    annot=True,
                    linewidths=0.02,
                    cmap=pal,
                    yticklabels=names,
                    xticklabels=names)
        plt.tight_layout()
        plt.yticks(rotation=0)
        plt.subplots_adjust(wspace=.3)
        plt.savefig('compare_network_dbs.png', dpi=300, bbox_inches='tight')
        plt.close()

    full_network = nt.compose_all(network_list)

    nt.standardize_edge_types(full_network)
    full_network = nt.delete_disconnected_network(full_network)
    # find_overlap(reactome_network, full_network)
    n_nodes = len(full_network.nodes)
    n_edges = len(full_network.edges)
    print("Background network {} nodes and {} edges".format(n_nodes, n_edges))

    nx.write_gpickle(full_network, '{}.p.gz'.format(save_name))
    nx.write_gml(full_network, '{}.gml'.format(save_name))
    return full_network
예제 #4
0
def download_network_dbs():
    nd.load_reactome_fi()
    nd.download_signor()
    nd.load_biogrid_network()
    dl.HMDB()
예제 #5
0
def download_network_dbs():
    nd.load_reactome_fi()
    nd.download_signor()
    nd.load_biogrid_network()
    dl.HMDB()