Example #1
0
def exportCurrentOntologyBuild(path_ontology_db,
                               ontology_annotations,
                               ontology_type,
                               display=False):
    program_type, database_dir = unique.whatProgramIsThis()
    parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    new_file = parent_dir + 'OBO/builds/built_' + ontology_type + '_paths.txt'
    try:
        fn = filepath(new_file)
        data = open(fn, 'w')
    except Exception:
        new_dir = parent_dir + 'OBO/builds'
        fn = filepath(new_dir)
        os.mkdir(fn)  ###Re-Create directory if deleted
        fn = filepath(new_file)
        data = open(fn, 'w')
    data.write('Path' + '\t' + 'ontology_id' + '\n')
    for path in path_ontology_db:
        ontology_id = path_ontology_db[path]
        path = pathToString(path)
        data.write(path + '\t' + ontology_id + '\n')
    data.close()

    new_file = parent_dir + 'OBO/builds/' + ontology_type + '_annotations.txt'
    fn = filepath(new_file)
    data = open(fn, 'w')
    data.write('ontology_id' + '\t' + 'Ontology Name' + '\t' +
               'Ontology Type' + '\n')
    for ontology_id in ontology_annotations:
        s = ontology_annotations[ontology_id]
        data.write(ontology_id + '\t' + s.OntologyTerm() + '\t' +
                   s.OntologyType() + '\n')
    data.close()
Example #2
0
def importPreviousOntologyBuild(ontology_type, display=True):
    program_type, database_dir = unique.whatProgramIsThis()
    parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    if ontology_type == 'GeneOntology': ontology_type = 'go'
    filename = parent_dir + 'OBO/builds/built_' + ontology_type + '_paths.txt'
    fn = filepath(filename)
    x = 0
    count = 0

    for line in open(fn, 'r').xreadlines():
        count += 1
    original_increment = int(count / 10)
    increment = original_increment

    try:  ### This reduces run-time for the typical analysis where the databases are in sync and up-to-date
        if run_mappfinder == 'yes':
            if verified_nested == 'no':
                build_nestedDB = 'yes'
            else:
                build_nestedDB = 'no'
        else:
            build_nestedDB = 'no'
    except Exception:
        build_nestedDB = 'yes'

    for line in open(fn, 'r').xreadlines():
        if x == 0: x += 1  ###Skip the title line
        else:
            x += 1
            if x == increment and display:
                increment += original_increment
                print '*',
            data = cleanUpLine(line)
            path, ontology_id = string.split(data, '\t')
            path = tuple(map(int, string.split(path, '.')))
            #path = string.split(path_str,'.'); path = convertStrListToIntList(path); path = tuple(path)
            #s = OntologyPath(ontology_id,'','','',path,''); s = OntologyPathAbr(ontology_id,path)
            if ':' not in ontology_id: ontology_id = 'GO:' + ontology_id
            path_ontology_db[path] = ontology_id
            try:
                built_ontology_paths[ontology_id].append(path)
            except KeyError:
                built_ontology_paths[ontology_id] = [path]
            if build_nestedDB == 'yes':
                path_dictionary[path] = [path]
    ###All of the paths need to be added before
    if build_nestedDB == 'yes':
        if build_nestedDB == 'yes':
            for path in path_dictionary:
                ###Build nested Path-index
                path_len = len(path)
                i = -1
                while path_len + i > 0:
                    parent_path = path[:i]
                    try:
                        path_dictionary[parent_path].append(path)
                    except Exception:
                        null = []
                    i -= 1
Example #3
0
def buildNestedOntologyTree(mappfinder):
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    
    global run_mappfinder; run_mappfinder = mappfinder
    ###Import all the OBO Ontology tree information from http:/www.geneontology.org/
    import_dir = '/'+parent_dir+'OBO'; global Ontology_version; path=[]; rank=0
    c = GrabFiles(); c.setdirectory(import_dir)
    file_dirs = c.searchdirectory('.ontology')
    file_dirs += c.searchdirectory('.obo')
    file_dirs.reverse()
    x = file_dirs[1:]+file_dirs[0:1] ###Reorganize to mimic GenMAPP order
    start_time = time.time()
    ontology_type = ''
    #print file_dirs
    for file_dir in file_dirs:
        if '.obo' in file_dir or '.ontology' in file_dir:
            if 'gene_ontology' in file_dir or 'goslim' in file_dir:
                ontology_type = 'GeneOntology'
                if 'goslim' in file_dir: ontology_type = 'GOSlim'
                ###Import the 3 main Ontology files and index them so that the first path corresponds to the Ontology type - Software checks the date before parsing
                path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'biological_process',rank)
                try: path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'molecular_function',rank)
                except Exception: null=[] ### Sometimes missing from GO-Slim
                path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'cellular_component',rank)
            else:
                ontology_type = getOntologyType(file_dir)
                path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'',rank)
            deleteNestedOntologyFiles(ontology_type) ### Necessary to trigger an update for all species
        else:
            print 'The ontology format present in',file_dir,'is no longer supported.'
        exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type)
    end_time = time.time(); time_diff = int(end_time-start_time)
    
    print "Ontology categories imported and nested in %d seconds" % time_diff
Example #4
0
def getHMDBData(species):
    program_type,database_dir = unique.whatProgramIsThis()
    filename = database_dir+'/'+species+'/gene/HMDB.txt'

    x=0
    fn=filepath(filename)
    for line in open(fn,'rU').xreadlines():
        data = cleanUpLine(line)
        if x==0: x=1
        else:
            t = string.split(data,'\t')
            try: hmdb_id,symbol,description,secondary_id,iupac,cas_number,chebi_id,pubchem_compound_id,Pathways,ProteinNames = t
            except Exception:
                ### Bad Tab introduced from HMDB
                hmdb_id = t[0]; symbol = t[1]; ProteinNames = t[-1]
            symbol_hmdb_db[symbol]=hmdb_id
            hmdb_symbol_db[hmdb_id] = symbol
    
            ProteinNames=string.split(ProteinNames,',')
            ### Add gene-metabolite interactions to databases
            for protein_name in ProteinNames:
                try:
                    for ensembl in symbol_ensembl_db[protein_name]: 
                        z = InteractionInformation(hmdb_id,ensembl,'HMDB','Metabolic')
                        interaction_annotation_dbase[ensembl,hmdb_id] = z ### This is the interaction direction that is appropriate
                        try: interaction_db[hmdb_id][ensembl]=1
                        except KeyError: db = {ensembl:1}; interaction_db[hmdb_id] = db ###weight of 1 (weights currently not-supported)
                        try: interaction_db[ensembl][hmdb_id]=1
                        except KeyError: db = {hmdb_id:1}; interaction_db[ensembl] = db ###weight of 1 (weights currently not-supported)
                except Exception: None
Example #5
0
def getHMDBData(species):
    program_type,database_dir = unique.whatProgramIsThis()
    filename = database_dir+'/'+species+'/gene/HMDB.txt'

    x=0
    fn=filepath(filename)
    for line in open(fn,'rU').xreadlines():
        data = cleanUpLine(line)
        if x==0: x=1
        else:
            t = string.split(data,'\t')
            try: hmdb_id,symbol,description,secondary_id,iupac,cas_number,chebi_id,pubchem_compound_id,Pathways,ProteinNames = t
            except Exception:
                ### Bad Tab introduced from HMDB
                hmdb_id = t[0]; symbol = t[1]; ProteinNames = t[-1]
            symbol_hmdb_db[symbol]=hmdb_id
            hmdb_symbol_db[hmdb_id] = symbol
    
            ProteinNames=string.split(ProteinNames,',')
            ### Add gene-metabolite interactions to databases
            for protein_name in ProteinNames:
                try:
                    for ensembl in symbol_ensembl_db[protein_name]: 
                        z = InteractionInformation(hmdb_id,ensembl,'HMDB','Metabolic')
                        interaction_annotation_dbase[ensembl,hmdb_id] = z ### This is the interaction direction that is appropriate
                        try: interaction_db[hmdb_id][ensembl]=1
                        except KeyError: db = {ensembl:1}; interaction_db[hmdb_id] = db ###weight of 1 (weights currently not-supported)
                        try: interaction_db[ensembl][hmdb_id]=1
                        except KeyError: db = {hmdb_id:1}; interaction_db[ensembl] = db ###weight of 1 (weights currently not-supported)
                except Exception: None
Example #6
0
def speciesData():
    program_type,database_dir = unique.whatProgramIsThis()
    filename = 'Config/species.txt'
    fn=filepath(filename); global species_list; species_list=[]; global species_codes; species_codes={}
    for line in open(fn,'r').readlines():             
        data = cleanUpLine(line)
        abrev,species = string.split(data,'\t')
        species_list.append(species)
        species_codes[species] = abrev
Example #7
0
def speciesData():
    program_type,database_dir = unique.whatProgramIsThis()
    filename = 'Config/species.txt'
    fn=filepath(filename); global species_list; species_list=[]; global species_codes; species_codes={}
    for line in open(fn,'r').readlines():             
        data = cleanUpLine(line)
        abrev,species = string.split(data,'\t')
        species_list.append(species)
        species_codes[species] = abrev
Example #8
0
def deleteNestedOntologyFiles(ontology_type):
    program_type,database_dir = unique.whatProgramIsThis()
    current_species_dirs = unique.read_directory('/'+database_dir)
    for species_code in current_species_dirs:
        c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species_code+'/nested')
        if ontology_type == 'GeneOntology': ontology_type = 'GO'
        file_dirs = c.searchdirectory('-'+ontology_type) ### list all nested files referencing the Ontology type
        for file in file_dirs:
            try: os.remove(filepath(database_dir+'/'+species_code+'/nested/'+file))
            except Exception: null=[]
Example #9
0
def deleteNestedOntologyFiles(ontology_type):
    program_type,database_dir = unique.whatProgramIsThis()
    current_species_dirs = unique.read_directory('/'+database_dir)
    for species_code in current_species_dirs:
        c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species_code+'/nested')
        if ontology_type == 'GeneOntology': ontology_type = 'GO'
        file_dirs = c.searchdirectory('-'+ontology_type) ### list all nested files referencing the Ontology type
        for file in file_dirs:
            try: os.remove(filepath(database_dir+'/'+species_code+'/nested/'+file))
            except Exception: null=[]
Example #10
0
def findAvailableOntologies(species,mod_types):
    program_type,database_dir = unique.whatProgramIsThis()
    c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species+'/gene-go'); file_dirs=[]
    for mod in mod_types:
        file_dirs+= c.searchdirectory(mod+'-')
    avaialble_ontologies=[]
    for filedir in file_dirs:
        ontology_type = string.split(filedir,'-')[-1][:-4] ### remove the .txt
        avaialble_ontologies.append(ontology_type)
    avaialble_ontologies = unique.unique(avaialble_ontologies)
    return avaialble_ontologies
Example #11
0
def findAvailableOntologies(species,mod_types):
    program_type,database_dir = unique.whatProgramIsThis()
    c = GrabFiles(); c.setdirectory('/'+database_dir+'/'+species+'/gene-go'); file_dirs=[]
    for mod in mod_types:
        file_dirs+= c.searchdirectory(mod+'-')
    avaialble_ontologies=[]
    for filedir in file_dirs:
        ontology_type = string.split(filedir,'-')[-1][:-4] ### remove the .txt
        avaialble_ontologies.append(ontology_type)
    avaialble_ontologies = unique.unique(avaialble_ontologies)
    return avaialble_ontologies
Example #12
0
def moveOntologyToArchiveDir(display=True):
    ### Move any existing OBO files to an archived directory as to not combine new with old annotations
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    c = GrabFiles()
    c.setdirectory('/'+parent_dir+'OBO')
    file_dirs = c.searchdirectory('.ontology')+c.searchdirectory('.obo')
    
    for file_dir in file_dirs:
        new_file_dir = string.replace(file_dir,parent_dir+'OBO/',parent_dir+'OBO/archive/')
        if display: print 'Moving:',file_dir,'to:',new_file_dir
        export.customFileMove(file_dir,new_file_dir)
Example #13
0
def exportVersionData(version,version_date,dir):
    ### Used by the module UI
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    elif 'OBO' in dir or 'Config' in dir: parent_dir = ''
    else: parent_dir = database_dir
    dir = parent_dir+dir    
    global current_version; current_version = version
    global current_version_date; current_version_date = version_date
    new_file = dir+'version.txt'
    data = export.ExportFile(new_file)
    data.write(str(version)+'\t'+str(version_date)+'\n'); data.close()
Example #14
0
def exportVersionData(version,version_date,dir):
    ### Used by the module UI
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    elif 'OBO' in dir or 'Config' in dir: parent_dir = ''
    else: parent_dir = database_dir
    dir = parent_dir+dir    
    global current_version; current_version = version
    global current_version_date; current_version_date = version_date
    new_file = dir+'version.txt'
    data = export.ExportFile(new_file)
    data.write(str(version)+'\t'+str(version_date)+'\n'); data.close()
Example #15
0
def importVersionData(dir):
    global OBO_date
    program_type, database_dir = unique.whatProgramIsThis()
    parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    dir = parent_dir + dir

    filename = dir + 'version.txt'
    fn = filepath(filename)
    for line in open(fn, 'r').readlines():
        data = cleanUpLine(line)
        OBO_version, OBO_date = string.split(data, '\t')
    return OBO_date
Example #16
0
def exportOntologyRelationships(nested_ontology_gene,gene_to_source_id,mod,source_type,ontology_type):
    program_type,database_dir = unique.whatProgramIsThis()
    if ontology_type == 'GeneOntology': ontology_type = 'GO'
    new_file = database_dir+'/'+species_code+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt'
    data = export.ExportFile(new_file)
    title = [mod,'ontology_id']; title_str = string.join(title,'\t')
    data.write(title_str+'\n')
    for ontology_id in nested_ontology_gene:
        for gene in nested_ontology_gene[ontology_id]:
            output_list = [gene,ontology_id]
            output_str = string.join(output_list,'\t')
            data.write(output_str+'\n')
    data.close()
    print new_file, 'saved to disk'
Example #17
0
def moveOntologyToArchiveDir(display=True):
    ### Move any existing OBO files to an archived directory as to not combine new with old annotations
    program_type, database_dir = unique.whatProgramIsThis()
    parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    c = GrabFiles()
    c.setdirectory('/' + parent_dir + 'OBO')
    file_dirs = c.searchdirectory('.ontology') + c.searchdirectory('.obo')

    for file_dir in file_dirs:
        new_file_dir = string.replace(file_dir, parent_dir + 'OBO/',
                                      parent_dir + 'OBO/archive/')
        if display: print 'Moving:', file_dir, 'to:', new_file_dir
        export.customFileMove(file_dir, new_file_dir)
Example #18
0
def exportOntologyRelationships(nested_ontology_gene,gene_to_source_id,mod,source_type,ontology_type):
    program_type,database_dir = unique.whatProgramIsThis()
    if ontology_type == 'GeneOntology': ontology_type = 'GO'
    new_file = database_dir+'/'+species_code+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt'
    data = export.ExportFile(new_file)
    title = [mod,'ontology_id']; title_str = string.join(title,'\t')
    data.write(title_str+'\n')
    for ontology_id in nested_ontology_gene:
        for gene in nested_ontology_gene[ontology_id]:
            output_list = [gene,ontology_id]
            output_str = string.join(output_list,'\t')
            data.write(output_str+'\n')
    data.close()
    print new_file, 'saved to disk'
Example #19
0
def sourceData():
    program_type,database_dir = unique.whatProgramIsThis()
    filename = 'Config/source_data.txt'
    fn=filepath(filename)
    global source_types; source_types=[]
    global system_codes; system_codes={}
    global mod_types; mod_types=[]
    for line in open(fn,'rU').readlines():             
        data = cleanUpLine(line)
        t = string.split(data,'\t'); source=t[0]
        try: system_code=t[1]
        except IndexError: system_code = 'NuLL'
        if len(t)>2: ### Therefore, this ID system is a potential MOD
            if t[2] == 'MOD': mod_types.append(source)
        if source not in mod_types: source_types.append(source) 
        system_codes[system_code] = source ###Used when users include system code data in their input file
Example #20
0
def verifyNestedFileCreation(species,mod_types,ontology_type):
    ### Determine which mods are present for Ontology
    program_type,database_dir = unique.whatProgramIsThis()
    mods_present = []; nested_present=[]; verified = 'no'
    for mod in mod_types:
        ontology_file = database_dir+'/'+species+'/gene-go/'+mod+'-'+ontology_type+'.txt'
        count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present)
        if count>1: mods_present.append(mod)
    if len(mods_present)>0:
        for mod in mods_present:
            if ontology_type == 'GeneOntology': ontology_type = 'GO'
            ontology_file = database_dir+'/'+species+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt'
            count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present)
            if count>1: nested_present.append(mod)
        if len(nested_present) == len(mods_present): verified = 'yes'
    return verified
Example #21
0
def verifyNestedFileCreation(species,mod_types,ontology_type):
    ### Determine which mods are present for Ontology
    program_type,database_dir = unique.whatProgramIsThis()
    mods_present = []; nested_present=[]; verified = 'no'
    for mod in mod_types:
        ontology_file = database_dir+'/'+species+'/gene-go/'+mod+'-'+ontology_type+'.txt'
        count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present)
        if count>1: mods_present.append(mod)
    if len(mods_present)>0:
        for mod in mods_present:
            if ontology_type == 'GeneOntology': ontology_type = 'GO'
            ontology_file = database_dir+'/'+species+'/nested/'+mod+'_to_Nested-'+ontology_type+'.txt'
            count = verifyFileLength(ontology_file) ### See if there are lines present in the file (if present)
            if count>1: nested_present.append(mod)
        if len(nested_present) == len(mods_present): verified = 'yes'
    return verified
Example #22
0
def sourceData():
    program_type,database_dir = unique.whatProgramIsThis()
    filename = 'Config/source_data.txt'
    fn=filepath(filename)
    global source_types; source_types=[]
    global system_codes; system_codes={}
    global mod_types; mod_types=[]
    for line in open(fn,'rU').readlines():             
        data = cleanUpLine(line)
        t = string.split(data,'\t'); source=t[0]
        try: system_code=t[1]
        except IndexError: system_code = 'NuLL'
        if len(t)>2: ### Therefore, this ID system is a potential MOD
            if t[2] == 'MOD': mod_types.append(source)
        if source not in mod_types: source_types.append(source) 
        system_codes[system_code] = source ###Used when users include system code data in their input file
Example #23
0
def importPreviousOntologyAnnotations(target_ontology_type):
    ontology_annotations={}
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    if target_ontology_type == 'GeneOntology': target_ontology_type = 'go'
    filename = parent_dir+'OBO/builds/'+target_ontology_type+'_annotations.txt'; fn=filepath(filename); x=0
    for line in open(fn,'r').xreadlines():
        if x==0: x=1 ###Skip the title line
        else:
            data = cleanUpLine(line)
            ontology_id,ontology_name,ontology_type = string.split(data,'\t')
            if ':' not in ontology_id: ontology_id = 'GO:'+ontology_id
            if ontology_name[0]== ' ': ontology_name = ontology_name[1:]
            s = OntologyTree(ontology_id,ontology_name,ontology_type)
            ontology_annotations[ontology_id] = s
    return ontology_annotations
Example #24
0
def importPreviousOntologyAnnotations(target_ontology_type):
    ontology_annotations={}
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    if target_ontology_type == 'GeneOntology': target_ontology_type = 'go'
    filename = parent_dir+'OBO/builds/'+target_ontology_type+'_annotations.txt'; fn=filepath(filename); x=0
    for line in open(fn,'r').xreadlines():
        if x==0: x=1 ###Skip the title line
        else:
            data = cleanUpLine(line)
            ontology_id,ontology_name,ontology_type = string.split(data,'\t')
            if ':' not in ontology_id: ontology_id = 'GO:'+ontology_id
            if ontology_name[0]== ' ': ontology_name = ontology_name[1:]
            s = OntologyTree(ontology_id,ontology_name,ontology_type)
            ontology_annotations[ontology_id] = s
    return ontology_annotations
Example #25
0
def importPreviousOntologyBuild(ontology_type,display=True):
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    if ontology_type == 'GeneOntology': ontology_type = 'go'
    filename = parent_dir+'OBO/builds/built_'+ontology_type+'_paths.txt'; fn=filepath(filename); x=0; count=0

    for line in open(fn,'r').xreadlines(): count+=1
    original_increment = int(count/10); increment = original_increment
    
    try: ### This reduces run-time for the typical analysis where the databases are in sync and up-to-date
        if run_mappfinder == 'yes':
            if verified_nested == 'no':
                build_nestedDB='yes'
            else: build_nestedDB = 'no'
        else: build_nestedDB = 'no'
    except Exception: build_nestedDB = 'yes'
            
    for line in open(fn,'r').xreadlines():
        if x==0: x+=1 ###Skip the title line
        else:
            x+=1
            if x == increment and display: increment+=original_increment; print '*',    
            data = cleanUpLine(line)
            path,ontology_id = string.split(data,'\t')
            path = tuple(map(int,string.split(path,'.')))
            #path = string.split(path_str,'.'); path = convertStrListToIntList(path); path = tuple(path)
            #s = OntologyPath(ontology_id,'','','',path,''); s = OntologyPathAbr(ontology_id,path)
            if ':' not in ontology_id: ontology_id = 'GO:'+ontology_id
            path_ontology_db[path] = ontology_id
            try: built_ontology_paths[ontology_id].append(path)
            except KeyError: built_ontology_paths[ontology_id] = [path]
            if build_nestedDB == 'yes':
                path_dictionary[path]=[path]
    ###All of the paths need to be added before  
    if build_nestedDB == 'yes':
        if build_nestedDB == 'yes':
            for path in path_dictionary:
                ###Build nested Path-index
                path_len = len(path); i=-1
                while path_len+i > 0:
                    parent_path = path[:i]
                    try: path_dictionary[parent_path].append(path)
                    except Exception: null=[]
                    i-=1    
Example #26
0
def exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type, display=False):
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    new_file = parent_dir+'OBO/builds/built_'+ontology_type+'_paths.txt'
    try: fn=filepath(new_file); data = open(fn,'w')
    except Exception:
        new_dir = parent_dir+'OBO/builds'; fn = filepath(new_dir)
        os.mkdir(fn) ###Re-Create directory if deleted
        fn=filepath(new_file); data = open(fn,'w')
    data.write('Path'+'\t'+'ontology_id'+'\n')
    for path in path_ontology_db:
        ontology_id = path_ontology_db[path]; path = pathToString(path)
        data.write(path +'\t'+ ontology_id +'\n')
    data.close()

    new_file = parent_dir+'OBO/builds/'+ontology_type+'_annotations.txt'
    fn=filepath(new_file); data = open(fn,'w')
    data.write('ontology_id'+'\t'+'Ontology Name'+'\t'+'Ontology Type'+'\n')    
    for ontology_id in ontology_annotations:
        s = ontology_annotations[ontology_id]
        data.write(ontology_id +'\t'+ s.OntologyTerm() +'\t'+ s.OntologyType() +'\n')
    data.close()
Example #27
0
def buildNestedOntologyTree(mappfinder,display=True):
    program_type,database_dir = unique.whatProgramIsThis(); parent_dir = ''
    if program_type == 'AltAnalyze': parent_dir = 'AltDatabase/goelite/'
    
    global run_mappfinder; run_mappfinder = mappfinder
    ###Import all the OBO Ontology tree information from http:/www.geneontology.org/
    import_dir = '/'+parent_dir+'OBO'; global Ontology_version; path=[]; rank=0
    c = GrabFiles(); c.setdirectory(import_dir)
    file_dirs = c.searchdirectory('.ontology')
    file_dirs += c.searchdirectory('.obo')
    file_dirs.reverse()
    x = file_dirs[1:]+file_dirs[0:1] ###Reorganize to mimic GenMAPP order
    start_time = time.time()
    ontology_type = ''
    #print file_dirs
    for file_dir in file_dirs:
        try:
            if '.obo' in file_dir or '.ontology' in file_dir:
                if 'gene_ontology' in file_dir or 'goslim' in file_dir:
                    ontology_type = 'GeneOntology'
                    if 'goslim' in file_dir: ontology_type = 'GOSlim'
                    ###Import the 3 main Ontology files and index them so that the first path corresponds to the Ontology type - Software checks the date before parsing
                    path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'biological_process',rank)
                    try: path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'molecular_function',rank)
                    except Exception: null=[] ### Sometimes missing from GO-Slim
                    path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'cellular_component',rank)
                else:
                    ontology_type = getOntologyType(file_dir)
                    path_ontology_db,built_ontology_paths,ontology_annotations,path_dictionary,path,rank = importOBONew(file_dir,path,'',rank)
                deleteNestedOntologyFiles(ontology_type) ### Necessary to trigger an update for all species
            else:
                if display: print 'The ontology format present in',file_dir,'is no longer supported.'
            exportCurrentOntologyBuild(path_ontology_db,ontology_annotations,ontology_type,display=display)
        except Exception:
            pass ### If an Ontology file fails download, it still may create an empty file that will screw up the processing of other obo files - just skip it
    end_time = time.time(); time_diff = int(end_time-start_time)
    
    if display: print "Ontology categories imported and nested in %d seconds" % time_diff
Example #28
0
def buildAccessoryPathwayDatabases(selected_species,additional_resources,force):
    global database_dir
    global program_type
    program_type,database_dir = unique.whatProgramIsThis()
    
    buildInferrenceTables(selected_species) ### Make sure these tables are present first!!!
        
    #print 'Attempting to update:', string.join(additional_resources,',')
    if 'KEGG' in additional_resources:
        try: importKEGGAssociations(selected_species,force)
        except Exception: print 'KEGG import failed (cause unknown)'
    if 'Transcription Factor Targets' in additional_resources:
        try: importTranscriptionTargetAssociations(selected_species,force)
        except Exception: print 'Transcription Factor Targets import failed (cause unknown)'
    if 'Phenotype Ontology' in additional_resources:
        try: importPhenotypeOntologyData(selected_species,force)
        except Exception: print 'Phenotype Ontology import failed (cause unknown)'
    if 'Disease Ontology' in additional_resources:
        try: importDiseaseOntologyAssociations(selected_species,force)
        except Exception: print 'Disease Ontology import failed (cause unknown)'
    if 'GOSlim' in additional_resources:
        try: importGOSlimAssociations(selected_species,force)
        except Exception: print 'GOSlim import failed (cause unknown)'
    if 'miRNA Targets' in additional_resources:
        try: importMiRAssociations(selected_species,force)
        except Exception: print 'miRNA Targets import failed (cause unknown)'
    if 'BioMarkers' in additional_resources:
        try: importBioMarkerAssociations(selected_species,force)
        except Exception: print 'BioMarkers import failed (cause unknown)'#,traceback.format_exc()
    if 'Domains' in additional_resources:
        try: importDomainAssociations(selected_species,force)
        except Exception: print 'Domains import failed (cause unknown)'
    if 'PathwayCommons' in additional_resources:
        try: importPathwayCommons(selected_species,force)
        except Exception: print 'PathwayCommons import failed (cause unknown)'
    if 'Latest WikiPathways' in additional_resources:
        try: importWikiPathways(selected_species,force)
        except Exception: print 'WikiPathways import failed (cause unknown)'
Example #29
0
def generateMAPPFinderScores(species_title, species_id, source, mod_db,
                             system_Codes, permute, resources_to_analyze,
                             file_dirs, parent_root):
    global mappfinder_output_dir
    global custom_sets_folder
    global root
    root = parent_root
    global mapp_to_mod_genes
    global ontology_to_mod_genes
    global system_codes
    system_codes = system_Codes
    criterion_input_folder, criterion_denom_folder, output_dir, custom_sets_folder = file_dirs
    previous_denominator_file_dir = ''
    ontology_to_mod_genes = {}
    mapp_to_mod_genes = {}
    global test
    test = 'no'
    program_type, database_dir = unique.whatProgramIsThis()
    if resources_to_analyze == 'Gene Ontology':
        resources_to_analyze = 'GeneOntology'

    if len(output_dir) == 0: mappfinder_output_dir = 'input/MAPPFinder'
    else:
        mappfinder_output_dir = output_dir + '/GO-Elite_results/CompleteResults/ORA'

    global source_data
    source_data = source
    global mod
    mod = mod_db
    global species_code
    species_code = species_id
    global species_name
    species_name = species_title
    global gene_to_mapp
    global permutations
    permutations = permute
    global eliminate_redundant_genes
    eliminate_redundant_genes = 'yes'
    global permuted_z_scores
    global ontology_annotations
    global original_ontology_z_score_data
    global original_mapp_z_score_data
    global input_gene_list
    global denominator_gene_list
    global gene_file
    global denom_file_status
    global input_count
    global denom_count
    global gene_annotations
    global source_to_gene
    global use_FET
    if permutations == "FisherExactTest":
        use_FET = 'yes'  ### Use Fisher's Exact test instead of permutation-based p-values
        permutations = 0
    else:
        use_FET = 'no'

    start_time = time.time()

    gene_annotations = gene_associations.importGeneData(species_code, mod)

    OBO_date = importVersionData('OBO/')
    if len(criterion_input_folder) == 0:
        import_dir = '/input/GenesToQuery/' + species_code
        import_dir_alt = import_dir[1:]
    else:
        import_dir = criterion_input_folder
        import_dir_alt = criterion_input_folder
    m = GrabFiles()
    m.setdirectory(import_dir)
    try:
        dir_list = readDirText(
            import_dir
        )  #send a sub_directory to a function to identify all files in a directory
    except Exception:
        print_out = 'Warning! Input directory location is not a valid folder. Exiting GO-Elite.'
        ForceCriticalError(print_out)
    try:
        denom_dir_list = readDirText(criterion_denom_folder)
    except Exception:
        print_out = 'Warning! Denominator directory location is not a valid folder. Exiting GO-Elite.'
        ForceCriticalError(print_out)
    if len(dir_list) == 0:
        error_message = 'No files with the extension ".txt" found in the input directory.'
        ForceCriticalError(error_message)
    if len(denom_dir_list) == 0:
        error_message = 'No files with the extension ".txt" found in the denominator directory.'
        ForceCriticalError(error_message)

    inputs_analyzed = 0
    for mappfinder_input in dir_list:  #loop through each file in the directory
        permuted_z_scores = {}
        original_ontology_z_score_data = {}
        original_mapp_z_score_data = {}
        print 'Performing over-representation analysis (ORA) on', mappfinder_input
        gene_file_dir, gene_file = m.searchdirectory(mappfinder_input)
        ###Import Input gene/source-id lists
        input_gene_list, source_data_input, error_message = gene_associations.importUIDsForMAPPFinderQuery(
            import_dir_alt + '/' + gene_file, system_codes, 'no')
        input_count = len(input_gene_list)
        if 'WARNING!!!' in error_message:  ### Warn the user about SwissProt issues when importing the denominator
            ForceCriticalError(error_message)
        if len(criterion_denom_folder) == 0:
            denom_folder = '/input/GenesToQuery/' + species_code + '/DenominatorGenes'
        else:
            denom_folder = criterion_denom_folder
        error_warning = "\nThe directory\n" + '[' + denom_folder + ']' + "\nwas not found. Please create the directory\nand place an appropriate denominator file\nor files in it."
        denominator_file_dir = identifyGeneFiles(
            denom_folder,
            gene_file)  ###input is in input\Genes, denominator in
        try:
            denominator_file_dir = identifyGeneFiles(
                denom_folder,
                gene_file)  ###input is in input\Genes, denominator in
            denominator_file = string.split(denominator_file_dir, '/')[-1]
            print 'Using:', denominator_file, 'for the denominator.'
        except Exception:
            print_out = "WARNING: No denominator file included in\nthe Denominator directory.\nTo proceed, place all denominator\nIDs in a file in that directory."
            ForceCriticalError(print_out)
        if denominator_file_dir == previous_denominator_file_dir:
            denom_file_status = 'old'
        else:
            denom_file_status = 'new'
        if denom_file_status == 'new':
            previous_denominator_file_dir = denominator_file_dir
            denominator_gene_list, source_data_denom, error_message = gene_associations.importUIDsForMAPPFinderQuery(
                denominator_file_dir, system_codes, 'no')
            denom_count = len(denominator_gene_list)
            if 'SwissProt' in error_message and 'WARNING!!!' not in error_message:
                if len(input_gene_list) == 0:
                    error_message += '\nNo valid input IDs found. Exiting GO-Elite.'
                    try:
                        UI.WarningWindow(
                            error_message, 'Warning!!! Identifier Error'
                        )  ### Only warn, don't force an exit (if SwissProt full IDs are present)
                    except Exception:
                        None
                    sys.exit()
                else:
                    try:
                        UI.WarningWindow(
                            error_message, 'Warning!!! Identifier Error'
                        )  ### Only warn, don't force an exit (if SwissProt full IDs are present)
                    except Exception:
                        None
            elif len(error_message) > 0:
                ForceCriticalError(error_message)
            if len(denominator_gene_list) == len(input_gene_list):
                print_out = 'Input and Denominator lists have identical counts.\nPlease load a propper denominator set (containing\nthe input list with all assayed gene IDs) before proceeding.'
                ForceCriticalError(print_out)
            original_denominator_gene_list = []
            for id in denominator_gene_list:
                original_denominator_gene_list.append(
                    id
                )  ###need this to be a valid list not dictionary for permutation analysis
        if len(source_data_input) > 0:
            source_data = source_data_input  ###over-ride source_data if a source was identified from the input file
        if source_data != mod:
            if denom_file_status == 'new':
                mod_source = mod + '-' + source_data + '.txt'
                #checkDenominatorMatchesInput(input_gene_list,denominator_gene_list,gene_file) ###This is checked for the source IDs not associated MOD IDs
                try:
                    gene_to_source_id = gene_associations.getGeneToUid(
                        species_code, mod_source)
                    print mod_source, 'imported'
                except Exception:
                    try:
                        if mod == 'EntrezGene': mod = 'Ensembl'
                        else: mod = 'EntrezGene'
                        print 'The primary system (MOD) has been switched from', mod_db, 'to', mod, '\n(' + mod_db, 'not supported for the %s ID system).' % source_data
                        mod_source = mod + '-' + source_data + '.txt'
                        gene_to_source_id = gene_associations.getGeneToUid(
                            species_code, mod_source)
                    except Exception:
                        print_out = "WARNING: The primary gene ID system '" + mod + "'\ndoes not support relationships with '" + source_data + "'.\nRe-run using a supported primary ID system."
                        ForceCriticalError(print_out)
                source_to_gene = OBO_import.swapKeyValues(gene_to_source_id)
                denominator_gene_list = associateInputSourceWithGene(
                    source_to_gene, denominator_gene_list)
                ### Introduced the below method in version 1.21 to improve permutation speed (no longer need to search all source IDs)
                ### Only includes source ID to gene relationships represented in the denominator file (needed for Affymetrix)
                source_to_gene = OBO_import.swapKeyValues(
                    denominator_gene_list)
            ###Replace input lists with corresponding MOD IDs
            input_gene_list = associateInputSourceWithGene(
                source_to_gene, input_gene_list)
        checkDenominatorMatchesInput(
            input_gene_list, denominator_gene_list,
            gene_file)  ###This is for only the associated MOD IDs

        gd = GrabFiles()
        gd.setdirectory('/' + database_dir + '/' + species_code + '/gene-mapp')
        available_genesets = reorganizeResourceList(gd.getAllFiles(mod))
        od = GrabFiles()
        od.setdirectory('/' + database_dir + '/' + species_code + '/gene-go')
        available_ontologies = reorganizeResourceList(od.getAllFiles(mod))

        input_gene_count = len(
            input_gene_list
        )  ###Count number of genes associated with source input IDs
        if len(input_gene_list) == 0 or len(denominator_gene_list) == 0:
            if len(input_gene_list) == 0:
                print_out = 'WARNING!!!! None of the input IDs provided map to genes for ' + mappfinder_input + '. Check to make sure the selected species is correct.'
                print_out += '\nSelected species: ' + species_name
                print_out += '\nInput ID system: ' + str(source_data_input)
                print_out += '\nPrimary ID system (MOD): ' + str(mod)
                ForceCriticalError(print_out)
            if len(denominator_gene_list) == 0:
                print_out = 'WARNING!!!! None of the denominator IDs provided map to genes for ' + denominator_file_dir + '. Check to make sure the selected species is correct.'
                print_out += '\nSelected species: ' + species_name
                print_out += '\nDenominator ID system: ' + str(source)
                print_out += '\nPrimary ID system (MOD):' + str(mod)
                ForceCriticalError(print_out)
        elif len(available_ontologies) == 0 and len(available_genesets) == 0:
            print_out = 'WARNING!!!! No Ontology or GeneSets appear to be available for this species. Please supply and re-analyze.'
            ForceCriticalError(print_out)
        else:
            """ Perform permutation analysis and ORA on available GeneSets or Ontologies"""
            inputs_analyzed += 1

            global permute_inputs
            permute_inputs = []
            if permutations != 0 or use_FET == 'no':
                buildPermutationDatabase(original_denominator_gene_list,
                                         input_count)

            run_status = 0
            ### Analyzed ontologies
            if len(available_ontologies) > 0:
                print '    Analyzing input ID list with available ontologies'
            for ontology_dir in available_ontologies:
                ontology_type = getResourceType(ontology_dir)
                permuted_z_scores = {}
                original_ontology_z_score_data = {}
                #print ontology_type, resources_to_analyze
                if resources_to_analyze == ontology_type or resources_to_analyze == 'all':
                    ontology_annotations = importOntologyAnnotations(
                        species_code, ontology_type)
                    if ontology_annotations != None:  ### Occurs when the files are named or formatted correctly
                        status, ontology_to_mod_genes = performOntologyORA(
                            ontology_dir)
                        run_status += status

            ### Analyzed gene-sets
            if len(available_genesets) > 0:
                print '    Analyzing input ID list with available gene-sets'
            for geneset_dir in available_genesets:
                geneset_type = getResourceType(geneset_dir)
                permuted_z_scores = {}
                original_mapp_z_score_data = {}
                if resources_to_analyze == geneset_type or resources_to_analyze == 'all':
                    status, mapp_to_mod_genes = performGeneSetORA(geneset_dir)
                    run_status += status
            if len(custom_sets_folder) > 0:
                ### Hence - Analyze User Supplied GeneSets
                permuted_z_scores = {}
                original_mapp_z_score_data = {}
                run_status += performGeneSetORA('UserSuppliedAssociations')[0]

            permute_inputs = []
            permute_mapp_inputs = []
            ontology_input_gene_count = []
            mapp_input_gene_count = []

            if run_status == 0:
                ### Returns the number of successfully analyzed gene-set databases
                program_type, database_dir = unique.whatProgramIsThis()
                print_out = "Warning!!! Either the MOD you have selected: " + mod + "\nis missing the appropriate relationshipfiles necessary to run GO-Elite\nor you have selected an invalid resource to analyze.  Either replace\nthe missing MOD files in " + database_dir + '/' + species_code + ' sub-directories or\nselect a different MOD at run-time.'
                ForceCriticalError(print_out)

    end_time = time.time()
    time_diff = formatTime(start_time, end_time)
    print 'ORA analyses finished in %s seconds' % time_diff
    return ontology_to_mod_genes, mapp_to_mod_genes  ###Return the MOD genes associated with each GO term and MAPP