# filter to species: -1: disabled, 0 (human), 1 (drosi), 2 (c.elegans), 3 (yeast) species_filter = -1 # group of custom sources, currently set to protein-protein interaction databases custom_source_group = ['MINT', 'IntAct', 'MIPS', 'DroID', 'MatrixDB', 'BioGRID', 'HPRD', 'DIP', 'CCSB'] # all proteins from ANY source DB (not intersection, but union!!) # SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') ); c = ComppiInterface() comppi_graph = c.buildGlobalComppi() # filter to species if needed if species_filter >= 0: print("Filtering to species: {}".format(species_filter)) comppi_graph = c.filterGraph(comppi_graph, None, species_filter) comppi_number_of_all_nodes = comppi_graph.number_of_nodes() print("Number of all nodes: {}".format(comppi_number_of_all_nodes)) print() # collect the proteins per source databases and localizations per source databases loc_sources = { 'No Loc Source': [] } prot_sources = { 'No Protein Source': [] } node_db_mapping = c.getNodeSourceDbs() for n, d in comppi_graph.nodes_iter(data=True):
'int_score' ]) # interactions for n1, n2, e in egograph.edges_iter(data=True): # note: same cells as in header csvw.writerow([ n1, egograph.node[n1]['name'], n2, egograph.node[n2]['name'], e['weight'] ]) print("Filtering the egograph...") filtered_egograph = c.filterGraph(egograph, args.loc, 0) # graph, loc, species print("Filtered Egograph: {} nodes, {} edges".format( filtered_egograph.number_of_nodes(), filtered_egograph.number_of_edges() )) with open("egograph-filtered-n_{}-r_{}-l_{}.csv".format(args.node_id, args.radius, args.loc), "w") as fp: csvw = csv.writer(fp, delimiter="\t", quoting=csv.QUOTE_MINIMAL) # header csvw.writerow([ 'node_a_id', 'node_a_name', 'node_b_id', 'node_b_name', 'int_score'
] # all proteins from ANY source DB (not intersection, but union!!) # SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') ); c = ComppiInterface() for spfilter in species_filter: comppi_graph = c.buildGlobalComppi() with open("comppi_data_overlap_log_sp_{}.txt".format(spfilter), "w") as logfp: # filter to species if needed if spfilter >= 0: print("Filtering to species: {}".format(spfilter), file=logfp) comppi_graph = c.filterGraph(comppi_graph, None, spfilter) comppi_number_of_all_nodes = comppi_graph.number_of_nodes() print("Number of all nodes: {}".format(comppi_number_of_all_nodes), file=logfp) print("\n", file=logfp) # collect the proteins per source databases and localizations per source databases loc_sources = {'No Loc Source': []} prot_sources = {'No Protein Source': []} node_db_mapping = c.getNodeSourceDbs() for n, d in comppi_graph.nodes_iter(data=True): # nodes curr_node_dbs = node_db_mapping.get(n) if curr_node_dbs is None:
# group of custom sources, currently set to protein-protein interaction databases custom_source_group = ['MINT', 'IntAct', 'MIPS', 'DroID', 'MatrixDB', 'BioGRID', 'HPRD', 'DIP', 'CCSB'] # all proteins from ANY source DB (not intersection, but union!!) # SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') ); c = ComppiInterface() for spfilter in species_filter: comppi_graph = c.buildGlobalComppi() with open("comppi_data_overlap_log_sp_{}.txt".format(spfilter), "w") as logfp: # filter to species if needed if spfilter >= 0: print("Filtering to species: {}".format(spfilter), file=logfp) comppi_graph = c.filterGraph(comppi_graph, None, spfilter) comppi_number_of_all_nodes = comppi_graph.number_of_nodes() print("Number of all nodes: {}".format(comppi_number_of_all_nodes), file=logfp) print("\n", file=logfp) # collect the proteins per source databases and localizations per source databases loc_sources = { 'No Loc Source': [] } prot_sources = { 'No Protein Source': [] } node_db_mapping = c.getNodeSourceDbs() for n, d in comppi_graph.nodes_iter(data=True):