def exportCurrentOntologyBuild(path_ontology_db, ontology_annotations, ontology_type, display=False): program_type, database_dir = unique.whatProgramIsThis() parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' new_file = parent_dir + 'OBO/builds/built_' + ontology_type + '_paths.txt' try: fn = filepath(new_file) data = open(fn, 'w') except Exception: new_dir = parent_dir + 'OBO/builds' fn = filepath(new_dir) os.mkdir(fn) ###Re-Create directory if deleted fn = filepath(new_file) data = open(fn, 'w') data.write('Path' + '\t' + 'ontology_id' + '\n') for path in path_ontology_db: ontology_id = path_ontology_db[path] path = pathToString(path) data.write(path + '\t' + ontology_id + '\n') data.close() new_file = parent_dir + 'OBO/builds/' + ontology_type + '_annotations.txt' fn = filepath(new_file) data = open(fn, 'w') data.write('ontology_id' + '\t' + 'Ontology Name' + '\t' + 'Ontology Type' + '\n') for ontology_id in ontology_annotations: s = ontology_annotations[ontology_id] data.write(ontology_id + '\t' + s.OntologyTerm() + '\t' + s.OntologyType() + '\n') data.close()
def importPreviousOntologyBuild(ontology_type, display=True): program_type, database_dir = unique.whatProgramIsThis() parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' if ontology_type == 'GeneOntology': ontology_type = 'go' filename = parent_dir + 'OBO/builds/built_' + ontology_type + '_paths.txt' fn = filepath(filename) x = 0 count = 0 for line in open(fn, 'r').xreadlines(): count += 1 original_increment = int(count / 10) increment = original_increment try: ### This reduces run-time for the typical analysis where the databases are in sync and up-to-date if run_mappfinder == 'yes': if verified_nested == 'no': build_nestedDB = 'yes' else: build_nestedDB = 'no' else: build_nestedDB = 'no' except Exception: build_nestedDB = 'yes' for line in open(fn, 'r').xreadlines(): if x == 0: x += 1 ###Skip the title line else: x += 1 if x == increment and display: increment += original_increment print '*', data = cleanUpLine(line) path, ontology_id = string.split(data, '\t') path = tuple(map(int, string.split(path, '.'))) #path = string.split(path_str,'.'); path = convertStrListToIntList(path); path = tuple(path) #s = OntologyPath(ontology_id,'','','',path,''); s = OntologyPathAbr(ontology_id,path) if ':' not in ontology_id: ontology_id = 'GO:' + ontology_id path_ontology_db[path] = ontology_id try: built_ontology_paths[ontology_id].append(path) except KeyError: built_ontology_paths[ontology_id] = [path] if build_nestedDB == 'yes': path_dictionary[path] = [path] ###All of the paths need to be added before if build_nestedDB == 'yes': if build_nestedDB == 'yes': for path in path_dictionary: ###Build nested Path-index path_len = len(path) i = -1 while path_len + i > 0: parent_path = path[:i] try: path_dictionary[parent_path].append(path) except Exception: null = [] i -= 1
def buildNestedOntologyTree(mappfinder): program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' global run_mappfinder; run_mappfinder = mappfinder ###Import all the OBO Ontology tree information from http:/www.geneontology.org/ import_dir = '/'+parent_dir+'OBO'; global Ontology_version; path=[]; rank=0 c = GrabFiles(); c.setdirectory(import_dir) file_dirs = c.searchdirectory('.ontology') file_dirs += c.searchdirectory('.obo') file_dirs.reverse() x = file_dirs[1:]+file_dirs[0:1] ###Reorganize to mimic GenMAPP order start_time = time.time() ontology_type = '' #print file_dirs for file_dir in file_dirs: if '.obo' in file_dir or '.ontology' in file_dir: if 'gene_ontology' in file_dir or 'goslim' in file_dir: ontology_type = 'GeneOntology' if 'goslim' in file_dir: ontology_type = 'GOSlim' ###Import the 3 main Ontology files and index them so that the first path corresponds to the Ontology type - Software checks the date before parsing path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'biological_process',rank) try: path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'molecular_function',rank) except Exception: null=[] ### Sometimes missing from GO-Slim path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'cellular_component',rank) else: ontology_type = getOntologyType(file_dir) path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'',rank) deleteNestedOntologyFiles(ontology_type) ### Necessary to trigger an update for all species else: print 'The ontology format present in',file_dir,'is no longer supported.' exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type) end_time = time.time(); time_diff = int(end_time-start_time) print "Ontology categories imported and nested in %d seconds" % time_diff
def getHMDBData(species): program_type,database_dir = unique.whatProgramIsThis() filename = database_dir+'/'+species+'/gene/HMDB.txt' x=0 fn=filepath(filename) for line in open(fn,'rU').xreadlines(): data = cleanUpLine(line) if x==0: x=1 else: t = string.split(data,'\t') try: hmdb_id,symbol,description,secondary_id,iupac,cas_number,chebi_id,pubchem_compound_id,Pathways,ProteinNames = t except Exception: ### Bad Tab introduced from HMDB hmdb_id = t[0]; symbol = t[1]; ProteinNames = t[-1] symbol_hmdb_db[symbol]=hmdb_id hmdb_symbol_db[hmdb_id] = symbol ProteinNames=string.split(ProteinNames,',') ### Add gene-metabolite interactions to databases for protein_name in ProteinNames: try: for ensembl in symbol_ensembl_db[protein_name]: z = InteractionInformation(hmdb_id,ensembl,'HMDB','Metabolic') interaction_annotation_dbase[ensembl,hmdb_id] = z ### This is the interaction direction that is appropriate try: interaction_db[hmdb_id][ensembl]=1 except KeyError: db = {ensembl:1}; interaction_db[hmdb_id] = db ###weight of 1 (weights currently not-supported) try: interaction_db[ensembl][hmdb_id]=1 except KeyError: db = {hmdb_id:1}; interaction_db[ensembl] = db ###weight of 1 (weights currently not-supported) except Exception: None
def speciesData(): program_type,database_dir = unique.whatProgramIsThis() filename = 'Config/species.txt' fn=filepath(filename); global species_list; species_list=[]; global species_codes; species_codes={} for line in open(fn,'r').readlines(): data = cleanUpLine(line) abrev,species = string.split(data,'\t') species_list.append(species) species_codes[species] = abrev
def deleteNestedOntologyFiles(ontology_type): program_type,database_dir = unique.whatProgramIsThis() current_species_dirs = unique.read_directory('/'+database_dir) for species_code in current_species_dirs: c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species_code+'/nested') if ontology_type == 'GeneOntology': ontology_type = 'GO' file_dirs = c.searchdirectory('-'+ontology_type) ### list all nested files referencing the Ontology type for file in file_dirs: try: os.remove(filepath(database_dir+'/'+species_code+'/nested/'+file)) except Exception: null=[]
def findAvailableOntologies(species,mod_types): program_type,database_dir = unique.whatProgramIsThis() c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species+'/gene-go'); file_dirs=[] for mod in mod_types: file_dirs+= c.searchdirectory(mod+'-') avaialble_ontologies=[] for filedir in file_dirs: ontology_type = string.split(filedir,'-')[-1][:-4] ### remove the .txt avaialble_ontologies.append(ontology_type) avaialble_ontologies = unique.unique(avaialble_ontologies) return avaialble_ontologies
def moveOntologyToArchiveDir(display=True): ### Move any existing OBO files to an archived directory as to not combine new with old annotations program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' c = GrabFiles() c.setdirectory('/'+parent_dir+'OBO') file_dirs = c.searchdirectory('.ontology')+c.searchdirectory('.obo') for file_dir in file_dirs: new_file_dir = string.replace(file_dir,parent_dir+'OBO/',parent_dir+'OBO/archive/') if display: print 'Moving:',file_dir,'to:',new_file_dir export.customFileMove(file_dir,new_file_dir)
def exportVersionData(version,version_date,dir): ### Used by the module UI program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' elif 'OBO' in dir or 'Config' in dir: parent_dir = '' else: parent_dir = database_dir dir = parent_dir+dir global current_version; current_version = version global current_version_date; current_version_date = version_date new_file = dir+'version.txt' data = export.ExportFile(new_file) data.write(str(version)+'\t'+str(version_date)+'\n'); data.close()
def importVersionData(dir): global OBO_date program_type, database_dir = unique.whatProgramIsThis() parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' dir = parent_dir + dir filename = dir + 'version.txt' fn = filepath(filename) for line in open(fn, 'r').readlines(): data = cleanUpLine(line) OBO_version, OBO_date = string.split(data, '\t') return OBO_date
def exportOntologyRelationships(nested_ontology_gene,gene_to_source_id,mod,source_type,ontology_type): program_type,database_dir = unique.whatProgramIsThis() if ontology_type == 'GeneOntology': ontology_type = 'GO' new_file = database_dir+'/'+species_code+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt' data = export.ExportFile(new_file) title = [mod,'ontology_id']; title_str = string.join(title,'\t') data.write(title_str+'\n') for ontology_id in nested_ontology_gene: for gene in nested_ontology_gene[ontology_id]: output_list = [gene,ontology_id] output_str = string.join(output_list,'\t') data.write(output_str+'\n') data.close() print new_file, 'saved to disk'
def moveOntologyToArchiveDir(display=True): ### Move any existing OBO files to an archived directory as to not combine new with old annotations program_type, database_dir = unique.whatProgramIsThis() parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' c = GrabFiles() c.setdirectory('/' + parent_dir + 'OBO') file_dirs = c.searchdirectory('.ontology') + c.searchdirectory('.obo') for file_dir in file_dirs: new_file_dir = string.replace(file_dir, parent_dir + 'OBO/', parent_dir + 'OBO/archive/') if display: print 'Moving:', file_dir, 'to:', new_file_dir export.customFileMove(file_dir, new_file_dir)
def sourceData(): program_type,database_dir = unique.whatProgramIsThis() filename = 'Config/source_data.txt' fn=filepath(filename) global source_types; source_types=[] global system_codes; system_codes={} global mod_types; mod_types=[] for line in open(fn,'rU').readlines(): data = cleanUpLine(line) t = string.split(data,'\t'); source=t[0] try: system_code=t[1] except IndexError: system_code = 'NuLL' if len(t)>2: ### Therefore, this ID system is a potential MOD if t[2] == 'MOD': mod_types.append(source) if source not in mod_types: source_types.append(source) system_codes[system_code] = source ###Used when users include system code data in their input file
def verifyNestedFileCreation(species,mod_types,ontology_type): ### Determine which mods are present for Ontology program_type,database_dir = unique.whatProgramIsThis() mods_present = []; nested_present=[]; verified = 'no' for mod in mod_types: ontology_file = database_dir+'/'+species+'/gene-go/'+mod+'-'+ontology_type+'.txt' count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present) if count>1: mods_present.append(mod) if len(mods_present)>0: for mod in mods_present: if ontology_type == 'GeneOntology': ontology_type = 'GO' ontology_file = database_dir+'/'+species+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt' count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present) if count>1: nested_present.append(mod) if len(nested_present) == len(mods_present): verified = 'yes' return verified
def importPreviousOntologyAnnotations(target_ontology_type): ontology_annotations={} program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' if target_ontology_type == 'GeneOntology': target_ontology_type = 'go' filename = parent_dir+'OBO/builds/'+target_ontology_type+'_annotations.txt'; fn=filepath(filename); x=0 for line in open(fn,'r').xreadlines(): if x==0: x=1 ###Skip the title line else: data = cleanUpLine(line) ontology_id,ontology_name,ontology_type = string.split(data,'\t') if ':' not in ontology_id: ontology_id = 'GO:'+ontology_id if ontology_name[0]== ' ': ontology_name = ontology_name[1:] s = OntologyTree(ontology_id,ontology_name,ontology_type) ontology_annotations[ontology_id] = s return ontology_annotations
def importPreviousOntologyBuild(ontology_type,display=True): program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' if ontology_type == 'GeneOntology': ontology_type = 'go' filename = parent_dir+'OBO/builds/built_'+ontology_type+'_paths.txt'; fn=filepath(filename); x=0; count=0 for line in open(fn,'r').xreadlines(): count+=1 original_increment = int(count/10); increment = original_increment try: ### This reduces run-time for the typical analysis where the databases are in sync and up-to-date if run_mappfinder == 'yes': if verified_nested == 'no': build_nestedDB='yes' else: build_nestedDB = 'no' else: build_nestedDB = 'no' except Exception: build_nestedDB = 'yes' for line in open(fn,'r').xreadlines(): if x==0: x+=1 ###Skip the title line else: x+=1 if x == increment and display: increment+=original_increment; print '*', data = cleanUpLine(line) path,ontology_id = string.split(data,'\t') path = tuple(map(int,string.split(path,'.'))) #path = string.split(path_str,'.'); path = convertStrListToIntList(path); path = tuple(path) #s = OntologyPath(ontology_id,'','','',path,''); s = OntologyPathAbr(ontology_id,path) if ':' not in ontology_id: ontology_id = 'GO:'+ontology_id path_ontology_db[path] = ontology_id try: built_ontology_paths[ontology_id].append(path) except KeyError: built_ontology_paths[ontology_id] = [path] if build_nestedDB == 'yes': path_dictionary[path]=[path] ###All of the paths need to be added before if build_nestedDB == 'yes': if build_nestedDB == 'yes': for path in path_dictionary: ###Build nested Path-index path_len = len(path); i=-1 while path_len+i > 0: parent_path = path[:i] try: path_dictionary[parent_path].append(path) except Exception: null=[] i-=1
def exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type, display=False): program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' new_file = parent_dir+'OBO/builds/built_'+ontology_type+'_paths.txt' try: fn=filepath(new_file); data = open(fn,'w') except Exception: new_dir = parent_dir+'OBO/builds'; fn = filepath(new_dir) os.mkdir(fn) ###Re-Create directory if deleted fn=filepath(new_file); data = open(fn,'w') data.write('Path'+'\t'+'ontology_id'+'\n') for path in path_ontology_db: ontology_id = path_ontology_db[path]; path = pathToString(path) data.write(path +'\t'+ ontology_id +'\n') data.close() new_file = parent_dir+'OBO/builds/'+ontology_type+'_annotations.txt' fn=filepath(new_file); data = open(fn,'w') data.write('ontology_id'+'\t'+'Ontology Name'+'\t'+'Ontology Type'+'\n') for ontology_id in ontology_annotations: s = ontology_annotations[ontology_id] data.write(ontology_id +'\t'+ s.OntologyTerm() +'\t'+ s.OntologyType() +'\n') data.close()
def buildNestedOntologyTree(mappfinder,display=True): program_type,database_dir = unique.whatProgramIsThis(); parent_dir = '' if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/' global run_mappfinder; run_mappfinder = mappfinder ###Import all the OBO Ontology tree information from http:/www.geneontology.org/ import_dir = '/'+parent_dir+'OBO'; global Ontology_version; path=[]; rank=0 c = GrabFiles(); c.setdirectory(import_dir) file_dirs = c.searchdirectory('.ontology') file_dirs += c.searchdirectory('.obo') file_dirs.reverse() x = file_dirs[1:]+file_dirs[0:1] ###Reorganize to mimic GenMAPP order start_time = time.time() ontology_type = '' #print file_dirs for file_dir in file_dirs: try: if '.obo' in file_dir or '.ontology' in file_dir: if 'gene_ontology' in file_dir or 'goslim' in file_dir: ontology_type = 'GeneOntology' if 'goslim' in file_dir: ontology_type = 'GOSlim' ###Import the 3 main Ontology files and index them so that the first path corresponds to the Ontology type - Software checks the date before parsing path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'biological_process',rank) try: path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'molecular_function',rank) except Exception: null=[] ### Sometimes missing from GO-Slim path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'cellular_component',rank) else: ontology_type = getOntologyType(file_dir) path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'',rank) deleteNestedOntologyFiles(ontology_type) ### Necessary to trigger an update for all species else: if display: print 'The ontology format present in',file_dir,'is no longer supported.' exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type,display=display) except Exception: pass ### If an Ontology file fails download, it still may create an empty file that will screw up the processing of other obo files - just skip it end_time = time.time(); time_diff = int(end_time-start_time) if display: print "Ontology categories imported and nested in %d seconds" % time_diff
def buildAccessoryPathwayDatabases(selected_species,additional_resources,force): global database_dir global program_type program_type,database_dir = unique.whatProgramIsThis() buildInferrenceTables(selected_species) ### Make sure these tables are present first!!! #print 'Attempting to update:', string.join(additional_resources,',') if 'KEGG' in additional_resources: try: importKEGGAssociations(selected_species,force) except Exception: print 'KEGG import failed (cause unknown)' if 'Transcription Factor Targets' in additional_resources: try: importTranscriptionTargetAssociations(selected_species,force) except Exception: print 'Transcription Factor Targets import failed (cause unknown)' if 'Phenotype Ontology' in additional_resources: try: importPhenotypeOntologyData(selected_species,force) except Exception: print 'Phenotype Ontology import failed (cause unknown)' if 'Disease Ontology' in additional_resources: try: importDiseaseOntologyAssociations(selected_species,force) except Exception: print 'Disease Ontology import failed (cause unknown)' if 'GOSlim' in additional_resources: try: importGOSlimAssociations(selected_species,force) except Exception: print 'GOSlim import failed (cause unknown)' if 'miRNA Targets' in additional_resources: try: importMiRAssociations(selected_species,force) except Exception: print 'miRNA Targets import failed (cause unknown)' if 'BioMarkers' in additional_resources: try: importBioMarkerAssociations(selected_species,force) except Exception: print 'BioMarkers import failed (cause unknown)'#,traceback.format_exc() if 'Domains' in additional_resources: try: importDomainAssociations(selected_species,force) except Exception: print 'Domains import failed (cause unknown)' if 'PathwayCommons' in additional_resources: try: importPathwayCommons(selected_species,force) except Exception: print 'PathwayCommons import failed (cause unknown)' if 'Latest WikiPathways' in additional_resources: try: importWikiPathways(selected_species,force) except Exception: print 'WikiPathways import failed (cause unknown)'
def generateMAPPFinderScores(species_title, species_id, source, mod_db, system_Codes, permute, resources_to_analyze, file_dirs, parent_root): global mappfinder_output_dir global custom_sets_folder global root root = parent_root global mapp_to_mod_genes global ontology_to_mod_genes global system_codes system_codes = system_Codes criterion_input_folder, criterion_denom_folder, output_dir, custom_sets_folder = file_dirs previous_denominator_file_dir = '' ontology_to_mod_genes = {} mapp_to_mod_genes = {} global test test = 'no' program_type, database_dir = unique.whatProgramIsThis() if resources_to_analyze == 'Gene Ontology': resources_to_analyze = 'GeneOntology' if len(output_dir) == 0: mappfinder_output_dir = 'input/MAPPFinder' else: mappfinder_output_dir = output_dir + '/GO-Elite_results/CompleteResults/ORA' global source_data source_data = source global mod mod = mod_db global species_code species_code = species_id global species_name species_name = species_title global gene_to_mapp global permutations permutations = permute global eliminate_redundant_genes eliminate_redundant_genes = 'yes' global permuted_z_scores global ontology_annotations global original_ontology_z_score_data global original_mapp_z_score_data global input_gene_list global denominator_gene_list global gene_file global denom_file_status global input_count global denom_count global gene_annotations global source_to_gene global use_FET if permutations == "FisherExactTest": use_FET = 'yes' ### Use Fisher's Exact test instead of permutation-based p-values permutations = 0 else: use_FET = 'no' start_time = time.time() gene_annotations = gene_associations.importGeneData(species_code, mod) OBO_date = importVersionData('OBO/') if len(criterion_input_folder) == 0: import_dir = '/input/GenesToQuery/' + species_code import_dir_alt = import_dir[1:] else: import_dir = criterion_input_folder import_dir_alt = criterion_input_folder m = GrabFiles() m.setdirectory(import_dir) try: dir_list = readDirText( import_dir ) #send a sub_directory to a function to identify all files in a directory except Exception: print_out = 'Warning! Input directory location is not a valid folder. Exiting GO-Elite.' ForceCriticalError(print_out) try: denom_dir_list = readDirText(criterion_denom_folder) except Exception: print_out = 'Warning! Denominator directory location is not a valid folder. Exiting GO-Elite.' ForceCriticalError(print_out) if len(dir_list) == 0: error_message = 'No files with the extension ".txt" found in the input directory.' ForceCriticalError(error_message) if len(denom_dir_list) == 0: error_message = 'No files with the extension ".txt" found in the denominator directory.' ForceCriticalError(error_message) inputs_analyzed = 0 for mappfinder_input in dir_list: #loop through each file in the directory permuted_z_scores = {} original_ontology_z_score_data = {} original_mapp_z_score_data = {} print 'Performing over-representation analysis (ORA) on', mappfinder_input gene_file_dir, gene_file = m.searchdirectory(mappfinder_input) ###Import Input gene/source-id lists input_gene_list, source_data_input, error_message = gene_associations.importUIDsForMAPPFinderQuery( import_dir_alt + '/' + gene_file, system_codes, 'no') input_count = len(input_gene_list) if 'WARNING!!!' in error_message: ### Warn the user about SwissProt issues when importing the denominator ForceCriticalError(error_message) if len(criterion_denom_folder) == 0: denom_folder = '/input/GenesToQuery/' + species_code + '/DenominatorGenes' else: denom_folder = criterion_denom_folder error_warning = "\nThe directory\n" + '[' + denom_folder + ']' + "\nwas not found. Please create the directory\nand place an appropriate denominator file\nor files in it." denominator_file_dir = identifyGeneFiles( denom_folder, gene_file) ###input is in input\Genes, denominator in try: denominator_file_dir = identifyGeneFiles( denom_folder, gene_file) ###input is in input\Genes, denominator in denominator_file = string.split(denominator_file_dir, '/')[-1] print 'Using:', denominator_file, 'for the denominator.' except Exception: print_out = "WARNING: No denominator file included in\nthe Denominator directory.\nTo proceed, place all denominator\nIDs in a file in that directory." ForceCriticalError(print_out) if denominator_file_dir == previous_denominator_file_dir: denom_file_status = 'old' else: denom_file_status = 'new' if denom_file_status == 'new': previous_denominator_file_dir = denominator_file_dir denominator_gene_list, source_data_denom, error_message = gene_associations.importUIDsForMAPPFinderQuery( denominator_file_dir, system_codes, 'no') denom_count = len(denominator_gene_list) if 'SwissProt' in error_message and 'WARNING!!!' not in error_message: if len(input_gene_list) == 0: error_message += '\nNo valid input IDs found. Exiting GO-Elite.' try: UI.WarningWindow( error_message, 'Warning!!! Identifier Error' ) ### Only warn, don't force an exit (if SwissProt full IDs are present) except Exception: None sys.exit() else: try: UI.WarningWindow( error_message, 'Warning!!! Identifier Error' ) ### Only warn, don't force an exit (if SwissProt full IDs are present) except Exception: None elif len(error_message) > 0: ForceCriticalError(error_message) if len(denominator_gene_list) == len(input_gene_list): print_out = 'Input and Denominator lists have identical counts.\nPlease load a propper denominator set (containing\nthe input list with all assayed gene IDs) before proceeding.' ForceCriticalError(print_out) original_denominator_gene_list = [] for id in denominator_gene_list: original_denominator_gene_list.append( id ) ###need this to be a valid list not dictionary for permutation analysis if len(source_data_input) > 0: source_data = source_data_input ###over-ride source_data if a source was identified from the input file if source_data != mod: if denom_file_status == 'new': mod_source = mod + '-' + source_data + '.txt' #checkDenominatorMatchesInput(input_gene_list,denominator_gene_list,gene_file) ###This is checked for the source IDs not associated MOD IDs try: gene_to_source_id = gene_associations.getGeneToUid( species_code, mod_source) print mod_source, 'imported' except Exception: try: if mod == 'EntrezGene': mod = 'Ensembl' else: mod = 'EntrezGene' print 'The primary system (MOD) has been switched from', mod_db, 'to', mod, '\n(' + mod_db, 'not supported for the %s ID system).' % source_data mod_source = mod + '-' + source_data + '.txt' gene_to_source_id = gene_associations.getGeneToUid( species_code, mod_source) except Exception: print_out = "WARNING: The primary gene ID system '" + mod + "'\ndoes not support relationships with '" + source_data + "'.\nRe-run using a supported primary ID system." ForceCriticalError(print_out) source_to_gene = OBO_import.swapKeyValues(gene_to_source_id) denominator_gene_list = associateInputSourceWithGene( source_to_gene, denominator_gene_list) ### Introduced the below method in version 1.21 to improve permutation speed (no longer need to search all source IDs) ### Only includes source ID to gene relationships represented in the denominator file (needed for Affymetrix) source_to_gene = OBO_import.swapKeyValues( denominator_gene_list) ###Replace input lists with corresponding MOD IDs input_gene_list = associateInputSourceWithGene( source_to_gene, input_gene_list) checkDenominatorMatchesInput( input_gene_list, denominator_gene_list, gene_file) ###This is for only the associated MOD IDs gd = GrabFiles() gd.setdirectory('/' + database_dir + '/' + species_code + '/gene-mapp') available_genesets = reorganizeResourceList(gd.getAllFiles(mod)) od = GrabFiles() od.setdirectory('/' + database_dir + '/' + species_code + '/gene-go') available_ontologies = reorganizeResourceList(od.getAllFiles(mod)) input_gene_count = len( input_gene_list ) ###Count number of genes associated with source input IDs if len(input_gene_list) == 0 or len(denominator_gene_list) == 0: if len(input_gene_list) == 0: print_out = 'WARNING!!!! None of the input IDs provided map to genes for ' + mappfinder_input + '. Check to make sure the selected species is correct.' print_out += '\nSelected species: ' + species_name print_out += '\nInput ID system: ' + str(source_data_input) print_out += '\nPrimary ID system (MOD): ' + str(mod) ForceCriticalError(print_out) if len(denominator_gene_list) == 0: print_out = 'WARNING!!!! None of the denominator IDs provided map to genes for ' + denominator_file_dir + '. Check to make sure the selected species is correct.' print_out += '\nSelected species: ' + species_name print_out += '\nDenominator ID system: ' + str(source) print_out += '\nPrimary ID system (MOD):' + str(mod) ForceCriticalError(print_out) elif len(available_ontologies) == 0 and len(available_genesets) == 0: print_out = 'WARNING!!!! No Ontology or GeneSets appear to be available for this species. Please supply and re-analyze.' ForceCriticalError(print_out) else: """ Perform permutation analysis and ORA on available GeneSets or Ontologies""" inputs_analyzed += 1 global permute_inputs permute_inputs = [] if permutations != 0 or use_FET == 'no': buildPermutationDatabase(original_denominator_gene_list, input_count) run_status = 0 ### Analyzed ontologies if len(available_ontologies) > 0: print ' Analyzing input ID list with available ontologies' for ontology_dir in available_ontologies: ontology_type = getResourceType(ontology_dir) permuted_z_scores = {} original_ontology_z_score_data = {} #print ontology_type, resources_to_analyze if resources_to_analyze == ontology_type or resources_to_analyze == 'all': ontology_annotations = importOntologyAnnotations( species_code, ontology_type) if ontology_annotations != None: ### Occurs when the files are named or formatted correctly status, ontology_to_mod_genes = performOntologyORA( ontology_dir) run_status += status ### Analyzed gene-sets if len(available_genesets) > 0: print ' Analyzing input ID list with available gene-sets' for geneset_dir in available_genesets: geneset_type = getResourceType(geneset_dir) permuted_z_scores = {} original_mapp_z_score_data = {} if resources_to_analyze == geneset_type or resources_to_analyze == 'all': status, mapp_to_mod_genes = performGeneSetORA(geneset_dir) run_status += status if len(custom_sets_folder) > 0: ### Hence - Analyze User Supplied GeneSets permuted_z_scores = {} original_mapp_z_score_data = {} run_status += performGeneSetORA('UserSuppliedAssociations')[0] permute_inputs = [] permute_mapp_inputs = [] ontology_input_gene_count = [] mapp_input_gene_count = [] if run_status == 0: ### Returns the number of successfully analyzed gene-set databases program_type, database_dir = unique.whatProgramIsThis() print_out = "Warning!!! Either the MOD you have selected: " + mod + "\nis missing the appropriate relationshipfiles necessary to run GO-Elite\nor you have selected an invalid resource to analyze. Either replace\nthe missing MOD files in " + database_dir + '/' + species_code + ' sub-directories or\nselect a different MOD at run-time.' ForceCriticalError(print_out) end_time = time.time() time_diff = formatTime(start_time, end_time) print 'ORA analyses finished in %s seconds' % time_diff return ontology_to_mod_genes, mapp_to_mod_genes ###Return the MOD genes associated with each GO term and MAPP