import networkx as nx import sys import scipy.stats import os from utils import yeast_name_resolver groups = { 'wt1' : ['WT1'], 'wt2' : ['WT2'], 'wt3' : ['WT3'], 'hu' : ['HU80', 'HU120', 'HU160'], 'rap' : ['RAP60', 'RAP140', 'RAP220', 'RAP300', 'RAP380', 'RAP460', 'RAP540', 'RAP620', 'RAP700'] } res = yeast_name_resolver.NameResolver() def main(gpath): G = nx.read_gpickle(gpath) nodes = sorted(G.nodes()) node_ix = dict(zip(nodes, np.arange(len(nodes)))) abd_df = read_chong('../data-sources/yeast/chong2015/mmc2.xls') num_features = len(abd_df.columns) - 1 F = np.zeros((len(nodes), num_features)) common_nodes = set(nodes) & set(abd_df['gene']) print("Common nodes between yeast net and chong dataset: %d" % len(common_nodes))
import numpy as np import networkx as nx import os import sys from pygosemsim import graph, download, annotation, term_set import pandas as pd import feature_preprocessing.pairwise_go_semsim import utils.yeast_name_resolver as nr myers2006path = "../data-sources/myers2006.csv" res = nr.NameResolver() gpath = "../generated-data/ppc_yeast" def main(): #download.obo("go-basic") #download.gaf("sgd") G = nx.read_gpickle(gpath) nodes = sorted(G.nodes()) node_ix = dict(zip(nodes, range(len(nodes)))) G = graph.from_resource("go-basic") df = pd.read_csv(myers2006path, sep="\t") df['namespace'] = [G.nodes[n]['namespace'] if n in G else None for n in df['GO ID']] print(df) ix = (df['namespace'] == 'biological_process') & (df['# of S. cerevisiae annotations (direct and indirect)'] > 3) df = df[ix]
def main(organism, output): reader = read_mitab_file if organism == 'yeast': genes_to_remove = ['yar062w yar062w', 'yir044c yir044c'] resolver = yeast_name_resolver.NameResolver() admissible_genes = resolver.get_genes() gc_only = True extractor = lambda s: yeast_extract_locus_tag(s, resolver, admissible_genes) taxid = 559292 elif organism == 'pombe': genes_to_remove = [] gene_names = '../data-sources/pombe/PomBase2UniProt.csv' gene_names_df = pd.read_csv(gene_names, sep='\t', header=None, names=['locus', 'common']) admissible_genes = set( [str(e).lower() for e in gene_names_df['locus']]) gc_only = False extractor = lambda s: pombe_extract_locus_tag(s, admissible_genes) taxid = 284812 elif organism == 'human': genes_to_remove = [] gene_names = '../data-sources/human/gene_names' gene_names_df = pd.read_csv(gene_names, sep='\t') admissible_genes = set( [str(e).lower() for e in gene_names_df['Approved symbol']]) gc_only = False extractor = lambda s: pombe_extract_locus_tag(s, admissible_genes) taxid = 9606 elif organism == "dro": genes_to_remove = [] with open('../tmp/dro_gene_map.json', 'r') as f: en_fbgn = json.load(f) gc_only = False extractor = lambda s: dro_extract_locus_tag(s, en_fbgn) admissible_genes = set([e.lower() for e in en_fbgn.values()]) taxid = 7227 reader = lambda file_path, copres_G, extractor, taxid: read_mitab_file( file_path, copres_G, extractor, taxid, '#ID Interactor A', 'ID Interactor B') copresp_G = nx.Graph() copresp_G.add_nodes_from(admissible_genes) for file_path in coprespfiles: print("Processing %s" % file_path) reader(file_path, copresp_G, extractor, taxid) copresp_G.remove_nodes_from(genes_to_remove) components = sorted(nx.connected_components(copresp_G), key=len, reverse=True) print("Full graph:") print(nx.info(copresp_G)) # write network to disk if gc_only: copresp_G = copresp_G.subgraph(components[0]) print(nx.info(copresp_G)) # write network to disk nx.write_gpickle(copresp_G, output) nx.write_gml(copresp_G, output + '.gml')