コード例 #1
0
import networkx as nx 
import sys 
import scipy.stats
import os 

from utils import yeast_name_resolver

groups = {
    'wt1' : ['WT1'],
    'wt2' : ['WT2'],
    'wt3' : ['WT3'],
    'hu' : ['HU80', 'HU120', 'HU160'],
    'rap' : ['RAP60', 'RAP140', 'RAP220', 'RAP300', 'RAP380', 'RAP460', 'RAP540', 'RAP620', 'RAP700']
}

res = yeast_name_resolver.NameResolver()

def main(gpath):
    
    G = nx.read_gpickle(gpath)
    nodes = sorted(G.nodes())    
    node_ix = dict(zip(nodes, np.arange(len(nodes))))

    abd_df = read_chong('../data-sources/yeast/chong2015/mmc2.xls')
  
    num_features = len(abd_df.columns) - 1

    F = np.zeros((len(nodes), num_features))
    
    common_nodes = set(nodes) & set(abd_df['gene'])
    print("Common nodes between yeast net and chong dataset: %d" % len(common_nodes))
import numpy as np 
import networkx as nx 
import os 
import sys 
from pygosemsim import graph, download, annotation, term_set
import pandas as pd 
import feature_preprocessing.pairwise_go_semsim
import utils.yeast_name_resolver as nr
myers2006path = "../data-sources/myers2006.csv"

res = nr.NameResolver()


gpath = "../generated-data/ppc_yeast"
def main():
    #download.obo("go-basic")
    #download.gaf("sgd")

    G = nx.read_gpickle(gpath)
    nodes = sorted(G.nodes())
    node_ix = dict(zip(nodes, range(len(nodes))))

    G = graph.from_resource("go-basic")

    df = pd.read_csv(myers2006path, sep="\t")

    df['namespace'] = [G.nodes[n]['namespace'] if n in G else None for n in df['GO ID']]
    print(df)

    ix = (df['namespace'] == 'biological_process') & (df['# of S. cerevisiae annotations (direct and indirect)'] > 3)
    df = df[ix]
コード例 #3
0
def main(organism, output):

    reader = read_mitab_file
    if organism == 'yeast':
        genes_to_remove = ['yar062w  yar062w', 'yir044c  yir044c']
        resolver = yeast_name_resolver.NameResolver()
        admissible_genes = resolver.get_genes()
        gc_only = True
        extractor = lambda s: yeast_extract_locus_tag(s, resolver,
                                                      admissible_genes)
        taxid = 559292

    elif organism == 'pombe':
        genes_to_remove = []
        gene_names = '../data-sources/pombe/PomBase2UniProt.csv'
        gene_names_df = pd.read_csv(gene_names,
                                    sep='\t',
                                    header=None,
                                    names=['locus', 'common'])
        admissible_genes = set(
            [str(e).lower() for e in gene_names_df['locus']])
        gc_only = False
        extractor = lambda s: pombe_extract_locus_tag(s, admissible_genes)
        taxid = 284812

    elif organism == 'human':
        genes_to_remove = []
        gene_names = '../data-sources/human/gene_names'
        gene_names_df = pd.read_csv(gene_names, sep='\t')
        admissible_genes = set(
            [str(e).lower() for e in gene_names_df['Approved symbol']])
        gc_only = False
        extractor = lambda s: pombe_extract_locus_tag(s, admissible_genes)
        taxid = 9606

    elif organism == "dro":
        genes_to_remove = []
        with open('../tmp/dro_gene_map.json', 'r') as f:
            en_fbgn = json.load(f)
        gc_only = False
        extractor = lambda s: dro_extract_locus_tag(s, en_fbgn)
        admissible_genes = set([e.lower() for e in en_fbgn.values()])
        taxid = 7227
        reader = lambda file_path, copres_G, extractor, taxid: read_mitab_file(
            file_path, copres_G, extractor, taxid, '#ID Interactor A',
            'ID Interactor B')

    copresp_G = nx.Graph()
    copresp_G.add_nodes_from(admissible_genes)
    for file_path in coprespfiles:
        print("Processing %s" % file_path)
        reader(file_path, copresp_G, extractor, taxid)

    copresp_G.remove_nodes_from(genes_to_remove)

    components = sorted(nx.connected_components(copresp_G),
                        key=len,
                        reverse=True)

    print("Full graph:")
    print(nx.info(copresp_G))

    # write network to disk
    if gc_only:
        copresp_G = copresp_G.subgraph(components[0])
    print(nx.info(copresp_G))

    # write network to disk
    nx.write_gpickle(copresp_G, output)
    nx.write_gml(copresp_G, output + '.gml')