## the 'ProteinDiseaseAssociationGraph' object has helper methods, but we can also access the networkx graph directly it is created with: print('Total nodes: %d' % len(proteinGraph.graph.nodes)) ## we will want to filter by the proteins we are interested in, this list comes from a DB adapter, but any set will do proteins = dbAdapter.loadTotalProteinList().protein_id filterByProteins = set(proteins) # using .attach will add edges from a DB as defined by the adapter, # with this method we can create a graph of data, which can itself be saved, prevents the # need from, rebuilding as we work on different diseases, perform analysis # We've also filter by proteins we care about, in this case it is our original list proteinGraph.attach(dbAdapter.loadPPI(filterByProteins)) proteinGraph.attach(dbAdapter.loadKegg(filterByProteins)) proteinGraph.attach(dbAdapter.loadReactome(filterByProteins)) proteinGraph.attach(dbAdapter.loadInterpro(filterByProteins)) proteinGraph.attach(dbAdapter.loadGo(filterByProteins)) # networkx provides an api we can nodes from \n", # here i exploit the unique features of each node to count them\n", # we can get a count of the nodes in the current graph keggNodes = [ g for g in list(proteinGraph.graph.nodes) if isinstance(g, str) and g[0:3] == "hsa" ] reactome = [ r for r in list(proteinGraph.graph.nodes) if isinstance(r, str) and r[0:2] == "R-"
logging.info('Total nodes: %d; edges: %d' % (pdg.graph.order(), pdg.graph.size())) ## Filter by proteins of interest; this list comes from a DB adapter, but any set will do. proteins = dbad.loadTotalProteinList().protein_id proteinSet = set(proteins) logging.info('Protein set: %d' % (len(proteinSet))) # Using attach() add edges from DB. # With this method create graph, which can be saved, avoiding # need for rebuilding for different diseases, models and analyses. # Also filter by proteins of interest, in this case it is our original list. pdg.attach(dbad.loadPPI(proteinSet)) pdg.attach(dbad.loadKegg(proteinSet)) pdg.attach(dbad.loadReactome(proteinSet)) pdg.attach(dbad.loadGo(proteinSet)) try: pdg.attach(dbad.loadInterpro(proteinSet)) except Exception as e: logging.error("InterPro failed to load: {0}".format(e)) # TCRD only: (Would these add value?) try: pdg.attach(dbad.loadOMIM(proteinSet)) except Exception as e: logging.error("OMIM failed to load: {0}".format(e)) # try: # pdg.attach(dbad.loadPfam(proteinSet)) # except Exception as e: