Example #1
0
# filter to species: -1: disabled, 0 (human), 1 (drosi), 2 (c.elegans), 3 (yeast)
species_filter = -1

# group of custom sources, currently set to protein-protein interaction databases
custom_source_group = ['MINT', 'IntAct', 'MIPS', 'DroID', 'MatrixDB', 'BioGRID', 'HPRD', 'DIP', 'CCSB']
# all proteins from ANY source DB (not intersection, but union!!)
# SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') );

c = ComppiInterface()
comppi_graph = c.buildGlobalComppi()

# filter to species if needed
if species_filter >= 0:
	print("Filtering to species: {}".format(species_filter))
	comppi_graph = c.filterGraph(comppi_graph, None, species_filter)

comppi_number_of_all_nodes = comppi_graph.number_of_nodes()
print("Number of all nodes: {}".format(comppi_number_of_all_nodes))
print()

# collect the proteins per source databases and localizations per source databases
loc_sources = {
	'No Loc Source': []
}
prot_sources = {
	'No Protein Source': []
}
node_db_mapping = c.getNodeSourceDbs()

for n, d in comppi_graph.nodes_iter(data=True):
Example #2
0
		'int_score'
	])
	
	# interactions
	for n1, n2, e in egograph.edges_iter(data=True):
		# note: same cells as in header
		csvw.writerow([
			n1,
			egograph.node[n1]['name'],
			n2,
			egograph.node[n2]['name'],
			e['weight']
		])

print("Filtering the egograph...")
filtered_egograph = c.filterGraph(egograph, args.loc, 0) # graph, loc, species
print("Filtered Egograph: {} nodes, {} edges".format(
	filtered_egograph.number_of_nodes(),
	filtered_egograph.number_of_edges()
))

with open("egograph-filtered-n_{}-r_{}-l_{}.csv".format(args.node_id, args.radius, args.loc), "w") as fp:
	csvw = csv.writer(fp, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
	
	# header
	csvw.writerow([
		'node_a_id',
		'node_a_name',
		'node_b_id',
		'node_b_name',
		'int_score'
Example #3
0
]
# all proteins from ANY source DB (not intersection, but union!!)
# SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') );

c = ComppiInterface()

for spfilter in species_filter:
    comppi_graph = c.buildGlobalComppi()

    with open("comppi_data_overlap_log_sp_{}.txt".format(spfilter),
              "w") as logfp:

        # filter to species if needed
        if spfilter >= 0:
            print("Filtering to species: {}".format(spfilter), file=logfp)
            comppi_graph = c.filterGraph(comppi_graph, None, spfilter)

        comppi_number_of_all_nodes = comppi_graph.number_of_nodes()
        print("Number of all nodes: {}".format(comppi_number_of_all_nodes),
              file=logfp)
        print("\n", file=logfp)

        # collect the proteins per source databases and localizations per source databases
        loc_sources = {'No Loc Source': []}
        prot_sources = {'No Protein Source': []}
        node_db_mapping = c.getNodeSourceDbs()

        for n, d in comppi_graph.nodes_iter(data=True):
            # nodes
            curr_node_dbs = node_db_mapping.get(n)
            if curr_node_dbs is None:
# group of custom sources, currently set to protein-protein interaction databases
custom_source_group = ['MINT', 'IntAct', 'MIPS', 'DroID', 'MatrixDB', 'BioGRID', 'HPRD', 'DIP', 'CCSB']
# all proteins from ANY source DB (not intersection, but union!!)
# SELECT COUNT(DISTINCT id) FROM Protein WHERE id IN ( SELECT proteinId FROM ProteinToDatabase WHERE sourceDb IN ('MINT', 'IntAct', 'MIPS', 'MatrixDB', 'DroID', 'BioGRID', 'HPRD', 'HomoMINT', 'DIP', 'CCSB') );

c = ComppiInterface()

for spfilter in species_filter:
	comppi_graph = c.buildGlobalComppi()
	
	with open("comppi_data_overlap_log_sp_{}.txt".format(spfilter), "w") as logfp:
	
		# filter to species if needed
		if spfilter >= 0:
			print("Filtering to species: {}".format(spfilter), file=logfp)
			comppi_graph = c.filterGraph(comppi_graph, None, spfilter)
		
		comppi_number_of_all_nodes = comppi_graph.number_of_nodes()
		print("Number of all nodes: {}".format(comppi_number_of_all_nodes), file=logfp)
		print("\n", file=logfp)
		
		# collect the proteins per source databases and localizations per source databases
		loc_sources = {
			'No Loc Source': []
		}
		prot_sources = {
			'No Protein Source': []
		}
		node_db_mapping = c.getNodeSourceDbs()
		
		for n, d in comppi_graph.nodes_iter(data=True):