def buildInteractions(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None, geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None, obligatorySet=None,secondarySet=None,IncludeExpIDs=False): global degrees global outputDir global inputDataType global obligatoryList ### Add these if connected to anything global secondaryQueryIDs global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs global FileName global intNameShort secondaryQueryIDs = {} degrees = Degrees outputDir = outputdir inputDataType = inputType obligatoryList = obligatorySet secondDegreeObligatoryCategories=[] intNameShort='' if obligatoryList == None: obligatoryList=[] if expressionFile == None: expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant ### This currently adds alot of predictions - either make more stringent or currently exclude secondDegreeObligatoryCategories = secondarySet if PathwayFilter != None: if len(PathwayFilter)==1: FileName = PathwayFilter[0] if isinstance(PathwayFilter, tuple) or isinstance(PathwayFilter, list): FileName = string.join(list(PathwayFilter),' ') FileName = string.replace(FileName,':','-') else: FileName = PathwayFilter if len(FileName)>40: FileName = FileName[:40] elif OntologyID != None: FileName = OntologyID elif Genes != None: FileName = Genes ### Import Ensembl-Symbol annotations getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt') if len(interactionDirs[0]) == 1: interactionDirs = [interactionDirs] ### Import interaction databases indicated in interactionDirs for i in interactionDirs: print i i = export.findFilename(i) i=string.split(i,'-')[1] intNameShort+=i[0] importInteractionData(interactionDirs) getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) input_IDs = getGeneIDs(Genes) try: if isinstance(PathwayFilter, tuple): for pathway in PathwayFilter: IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,pathway,OntologyID,directory) for id in IDs:input_IDs[id]=None else: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory) except Exception: None if expressionFile == None or len(expressionFile)==0: expressionFile = exportSelectedIDs(input_IDs) ### create an expression file elif IncludeExpIDs: ### Prioritize selection of IDs for interactions WITH the primary query set (not among expression input IDs) secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) sif_file,symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file) output_filename = exportGraphImage(species,sif_file,expressionFile) return output_filename
def buildInteractions(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None, geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None, obligatorySet=None,secondarySet=None,IncludeExpIDs=False): global degrees global outputDir global inputDataType global obligatoryList ### Add these if connected to anything global secondaryQueryIDs global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs global FileName global intNameShort secondaryQueryIDs = {} degrees = Degrees outputDir = outputdir inputDataType = inputType obligatoryList = obligatorySet secondDegreeObligatoryCategories=[] intNameShort='' if obligatoryList == None: obligatoryList=[] if expressionFile == None: expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant ### This currently adds alot of predictions - either make more stringent or currently exclude secondDegreeObligatoryCategories = secondarySet if PathwayFilter != None: if len(PathwayFilter)==1: FileName = PathwayFilter[0] if isinstance(PathwayFilter, tuple) or isinstance(PathwayFilter, list): FileName = string.join(list(PathwayFilter),' ') FileName = string.replace(FileName,':','-') else: FileName = PathwayFilter if len(FileName)>40: FileName = FileName[:40] elif OntologyID != None: FileName = OntologyID elif Genes != None: FileName = Genes ### Import Ensembl-Symbol annotations getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt') if len(interactionDirs[0]) == 1: interactionDirs = [interactionDirs] ### Import interaction databases indicated in interactionDirs for i in interactionDirs: print i i = export.findFilename(i) i=string.split(i,'-')[1] intNameShort+=i[0] importInteractionData(interactionDirs) getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) input_IDs = getGeneIDs(Genes) try: if isinstance(PathwayFilter, tuple): for pathway in PathwayFilter: IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,pathway,OntologyID,directory) for id in IDs:input_IDs[id]=None else: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory) except Exception: None if expressionFile == None or len(expressionFile)==0: expressionFile = exportSelectedIDs(input_IDs) ### create an expression file elif IncludeExpIDs: ### Prioritize selection of IDs for interactions WITH the primary query set (not among expression input IDs) secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) sif_file,symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file) output_filename = exportGraphImage(species,sif_file,expressionFile) return output_filename
def interactionPermuteTest(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None, geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None, obligatorySet=None,secondarySet=None,IncludeExpIDs=False): global degrees global outputDir global inputDataType global obligatoryList ### Add these if connected to anything global secondaryQueryIDs global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs global FileName secondaryQueryIDs = {} degrees = Degrees outputDir = outputdir inputDataType = inputType obligatoryList = obligatorySet secondDegreeObligatoryCategories=[] if obligatoryList == None: obligatoryList=[] if expressionFile == None: expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant ### This currently adds alot of predictions - either make more stringent or currently exclude secondDegreeObligatoryCategories = secondarySet if PathwayFilter != None: FileName = PathwayFilter elif OntologyID != None: FileName = OntologyID elif Genes != None: FileName = Genes ### Import Ensembl-Symbol annotations getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt') ### Import interaction databases indicated in interactionDirs importInteractionData(interactionDirs) getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) input_IDs = getGeneIDs(Genes) try: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory) except Exception: None permutations = 10000; p = 0 secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) ### Get the number of unique genes sif_file, original_symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file) #print len(original_symbol_pair_unique) ensembl_unique = map(lambda x: x, ensembl_symbol_db) interaction_lengths = [] import random while p < permutations: random_inputs = random.sample(ensembl_unique,len(input_IDs)) random_input_db={} #print len(random_inputs), len(input_IDs); sys.exit() for i in random_inputs: random_input_db[i]=i secondaryQueryIDs = importqueryResults(species,random_inputs,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) sif_file, symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,inputDir) #print len(symbol_pair_unique);sys.exit() interaction_lengths.append(len(symbol_pair_unique)) p+=1 interaction_lengths.sort(); interaction_lengths.reverse() y = len(original_symbol_pair_unique) print 'permuted length distribution:',interaction_lengths print 'original length:',y k=0 for i in interaction_lengths: if i>=y: k+=1 print 'p-value:',float(k)/float(permutations)
def interactionPermuteTest(species,Degrees,inputType,inputDir,outputdir,interactionDirs,Genes=None, geneSetType=None,PathwayFilter=None,OntologyID=None,directory=None,expressionFile=None, obligatorySet=None,secondarySet=None,IncludeExpIDs=False): global degrees global outputDir global inputDataType global obligatoryList ### Add these if connected to anything global secondaryQueryIDs global secondDegreeObligatoryCategories ### Add if common to anything in the input - Indicates systems to apply this to global symbol_hmdb_db; symbol_hmdb_db={}; global hmdb_symbol_db; hmdb_symbol_db={} ### Create an annotation database for HMDB IDs global FileName secondaryQueryIDs = {} degrees = Degrees outputDir = outputdir inputDataType = inputType obligatoryList = obligatorySet secondDegreeObligatoryCategories=[] if obligatoryList == None: obligatoryList=[] if expressionFile == None: expressionFile = inputDir ### If it doesn't contain expression values, view as yellow nodes if secondarySet!= None and (degrees==1 or degrees=='direct'): ### If degrees == 2, this is redundant ### This currently adds alot of predictions - either make more stringent or currently exclude secondDegreeObligatoryCategories = secondarySet if PathwayFilter != None: FileName = PathwayFilter elif OntologyID != None: FileName = OntologyID elif Genes != None: FileName = Genes ### Import Ensembl-Symbol annotations getEnsemblGeneData('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl-annotations.txt') ### Import interaction databases indicated in interactionDirs importInteractionData(interactionDirs) getHMDBData(species) ### overwrite the symbol annotation from any HMDB that comes from a WikiPathway or KEGG pathway that we also include (for consistent official annotation) input_IDs = getGeneIDs(Genes) try: input_IDs = gene_associations.simpleGenePathwayImport(species,geneSetType,PathwayFilter,OntologyID,directory) except Exception: None permutations = 10000; p = 0 secondaryQueryIDs = importqueryResults(species,expressionFile,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) ### Get the number of unique genes sif_file, original_symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,dir_file) #print len(original_symbol_pair_unique) ensembl_unique = map(lambda x: x, ensembl_symbol_db) interaction_lengths = [] import random while p < permutations: random_inputs = random.sample(ensembl_unique,len(input_IDs)) random_input_db={} #print len(random_inputs), len(input_IDs); sys.exit() for i in random_inputs: random_input_db[i]=i secondaryQueryIDs = importqueryResults(species,random_inputs,{})[0] input_IDs,query_interactions,dir_file = importqueryResults(species,inputDir,input_IDs) sif_file, symbol_pair_unique = associateQueryGenesWithInteractions(input_IDs,query_interactions,inputDir) #print len(symbol_pair_unique);sys.exit() interaction_lengths.append(len(symbol_pair_unique)) p+=1 interaction_lengths.sort(); interaction_lengths.reverse() y = len(original_symbol_pair_unique) print 'permuted length distribution:',interaction_lengths print 'original length:',y k=0 for i in interaction_lengths: if i>=y: k+=1 print 'p-value:',float(k)/float(permutations)