Example #1
0
def justConvertFilenames(species, outputdir):
    import gene_associations

    gene_to_symbol = gene_associations.getGeneToUid(species, ("hide", "Ensembl-Symbol"))
    import OBO_import

    symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)

    for filename in os.listdir(outputdir):
        if ".pdf" in filename or ".png" in filename:
            fn = string.replace(filename, ".pdf", "")
            fn = string.replace(fn, ".png", "")
            newname = string.split(fn, "__")

            if newname[0] in gene_to_symbol:
                new_filename = str(filename)
                if "__" in filename:
                    new_filename = string.split(filename, "__")[1]
                elif "\\" in filename:
                    new_filename = string.split(filename, "\\")[1]
                elif "/" in filename:
                    new_filename = string.split(filename, "/")[1]
                nnname = gene_to_symbol[newname[0]][0] + "-SashimiPlot_" + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname))
                except Exception:
                    pass
            else:
                continue
Example #2
0
def justConvertFilenames(species, outputdir):
    import gene_associations
    gene_to_symbol = gene_associations.getGeneToUid(species,
                                                    ('hide', 'Ensembl-Symbol'))
    import OBO_import
    symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)

    for filename in os.listdir(outputdir):
        if '.pdf' in filename or '.png' in filename:
            fn = string.replace(filename, '.pdf', '')
            fn = string.replace(fn, '.png', '')
            newname = string.split(fn, '__')

            if newname[0] in gene_to_symbol:
                new_filename = str(filename)
                if '__' in filename:
                    new_filename = string.split(filename, '__')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename, '\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename, '/')[1]
                nnname = gene_to_symbol[
                    newname[0]][0] + '-SashimiPlot_' + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename),
                              os.path.join(outputdir, nnname))
                except Exception:
                    pass
            else:
                continue
Example #3
0
def filterRows(input_file,output_file,filterDB=None,logData=False):
    orderlst={}
    counter=[]
    export_object = open(output_file,'w')
    firstLine = True
    Flag=0;
    species="Hs"
    import OBO_import; import ExpressionBuilder
    gene_to_symbol_db = ExpressionBuilder.importGeneAnnotations(species)
    symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol_db)
    
    for line in open(input_file,'rU').xreadlines():
        flag1 = 0
        data = cleanUpLine(line)
        values = string.split(data,'\t')
    
        if firstLine:
            firstLine = False
            if Flag==0:
                export_object.write(line)
        else:
            try: symbolID = gene_to_symbol_db[values[0]][0]
            except Exception: symbolID = values[0]
            if symbolID in filterDB:
                counter=[index for index, value in enumerate(filterDB) if value == symbolID]
                for it in range(0,len(counter)):
                    orderlst[counter[it]]=line
    try:
        for i in range(0,len(orderlst)):
            export_object.write(orderlst[i])
    except Exception:
        print i,filterDB[i]

    export_object.close()
    print 'Filtered rows printed to:',output_file
Example #4
0
def sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=None):
    import gene_associations
    gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
    import OBO_import; symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)

    if events==None:
        splicing_events,expandedSearch = importSplicingEventsToVisualize(eventsToVisualizeFilename)
    else:
        ### Replace any ":" from the input events
        #for i in range(len(events)): events[i] = string.replace(events[i],':','__')
        expandedSearch = True
        
        for i in range(len(events)):
            gene = string.split(events[i],'__')[0]
            if gene in gene_to_symbol:
                symbol = gene_to_symbol[gene][0]
            elif 'ENS' not in gene or 'G0000' in gene:
                if gene in symbol_to_gene:
                    ensID = symbol_to_gene[gene][0]
                    symbol = gene
                    events[i] = ensID ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup
        splicing_events = events ### optionally get from supplied variable

    if len(splicing_events)==0:
        print eventsToVisualizeFilename
        forceNoCompatibleEventsInFile
    
    print 'Exporting plots',
    
    ### Determine Groups for Coloring
    groups_file = 'None'
    dir_list = unique.read_directory(root_dir+'/ExpressionInput')

    for file in dir_list:
         if 'groups.' in file:
            groups_file = root_dir+'/ExpressionInput/'+file

    if groups_file != None:
        try:
            import ExpressionBuilder
            sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file)
            groups=[]
            for sample in sample_group_db:
                if sample_group_db[sample] not in groups:
                    groups.append(sample_group_db[sample]) ### create an ordered list of unique group
        except Exception:
            groups = ['None']
            #print traceback.format_exc()
            pass

    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
    mopup_events = getMopUpEvents(splicing_events, processed_events)

    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
    #print len(splicing_events),len(processed_events),len(mopup_events)
    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch)
    if len(processed_events)>0:
        mopup_events = getMopUpEvents(mopup_events, processed_events)
        processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
    return gene_to_symbol
def importDataSimple(filename,input_type,MOD=None,Species=None):
    id_db={}
    fn = filepath(filename)
    x=0
    for line in open(fn,'rU').xreadlines():         
        data = cleanUpLine(line)
        t = string.split(data,'\t')
        if data[0]=='#' and x==0: x=0
        elif x==0:
            column_headers = t[1:]
            if input_type != 'LineageProfiler':
                try: column_headers = t[2] ### exclude the ID, system code and p-value column headers
                except Exception: column_headers = 'NA'
            x=1
        else:
            if x==1 and input_type != 'LineageProfiler':
                ### get system conversions
                system_code = t[1]
                import GO_Elite
                import OBO_import
                system_codes,source_types,mod_types = GO_Elite.getSourceData()
                source_data = system_codes[system_code]
                try:
                    Mod=mod ### global established in upstream functions
                    speciescode = species_code
                except Exception:
                    Mod=MOD
                    speciescode = Species
                if source_data == Mod:
                    source_is_mod = True
                else:
                    source_is_mod = False
                    mod_source = Mod+'-'+source_data+'.txt'
                    gene_to_source_id = gene_associations.getGeneToUid(speciescode,('hide',mod_source))
                    source_to_gene = OBO_import.swapKeyValues(gene_to_source_id)
            if input_type != 'LineageProfiler':
                if source_is_mod == True:
                    try: id_db[t[0]] = float(t[2])
                    except Exception: id_db[t[0]] = 'NA'
                elif t[0] in source_to_gene:
                    mod_ids = source_to_gene[t[0]]
                    for mod_id in mod_ids:
                        try: value = t[2]
                        except Exception: value = 'NA'
                        if value == '+': value = 1
                        elif value == '-': value = -1
                        try: id_db[mod_id] = float(value) ### If multiple Ensembl IDs in dataset, only record the last associated fold change
                        except Exception: id_db[mod_id] = 'NA'
                        break
            else:
                id_db[t[0]]= map(float,t[1:]) ### Applies to LineageProfiler
            x+=1
    #print len(id_db),column_headers
    return id_db,column_headers
Example #6
0
def importDataSimple(filename, input_type):
    id_db = {}
    fn = filepath(filename)
    x = 0
    for line in open(fn, 'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data, '\t')
        if data[0] == '#': x = 0
        elif x == 0:
            column_headers = t[1:]
            if input_type != 'LineageProfiler':
                column_headers = t[
                    2]  ### exclude the ID, system code and p-value column headers
            x = 1
        else:
            if x == 1 and input_type != 'LineageProfiler':
                ### get system conversions
                system_code = t[1]
                import GO_Elite
                import OBO_import
                system_codes, source_types, mod_types = GO_Elite.getSourceData(
                )
                source_data = system_codes[system_code]
                if source_data == mod:
                    source_is_mod = True
                else:
                    source_is_mod = False
                    mod_source = mod + '-' + source_data + '.txt'
                    gene_to_source_id = gene_associations.getGeneToUid(
                        species_code, ('hide', mod_source))
                    source_to_gene = OBO_import.swapKeyValues(
                        gene_to_source_id)
            if input_type != 'LineageProfiler':
                if source_is_mod == True:
                    id_db[t[0]] = float(t[2])
                elif t[0] in source_to_gene:
                    mod_ids = source_to_gene[t[0]]
                    for mod_id in mod_ids:
                        id_db[mod_id] = float(
                            t[2]
                        )  ### If multiple Ensembl IDs in dataset, only record the last associated fold chagne
            else:
                id_db[t[0]] = map(float, t[1:])  ### Applies to LineageProfiler
            x += 1
    return id_db, column_headers
Example #7
0
def exportSymbolRelationships(pathway_to_symbol,selected_species,pathway_type,type):    
    if selected_species != None: ### Restrict to selected species only
        current_species_dirs=selected_species
    else:
        current_species_dirs = unique.read_directory('/'+database_dir)
    
    for species in current_species_dirs:
        if '.' not in species:
            ens_dir = database_dir+'/'+species+'/gene-'+type+'/Ensembl-'+pathway_type+'.txt'
            ens_data = export.ExportFile(ens_dir)
            if 'mapp' in type: ens_data.write('GeneID\tSystem\tGeneSet\n')
            else: ens_data.write('GeneID\tGeneSet\n')
            try: ens_to_entrez = gene_associations.getGeneToUid(species,('hide','Ensembl-EntrezGene'))
            except Exception: ens_to_entrez ={}
            if len(ens_to_entrez)>0:
                entrez_dir = database_dir+'/'+species+'/gene-'+type+'/EntrezGene-'+pathway_type+'.txt'
                entrez_data = export.ExportFile(entrez_dir)
                if 'mapp' in type: entrez_data.write('GeneID\tSystem\tGeneSet\n')
                else: entrez_data.write('GeneID\tGeneSet\n')
            #print 'Exporting '+pathway_type+' databases for:',species
            try: gene_to_source_id = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
            except Exception: gene_to_source_id={}
            source_to_gene = OBO_import.swapKeyValues(gene_to_source_id)
            source_to_gene = lowerSymbolDB(source_to_gene)
            for pathway in pathway_to_symbol:
                for symbol in pathway_to_symbol[pathway]:
                    try:
                        genes = source_to_gene[symbol]
                        for gene in genes:
                            if 'mapp' in type: ens_data.write(gene+'\tEn\t'+pathway+'\n')
                            else: ens_data.write(gene+'\t'+pathway+'\n')
                            if gene in ens_to_entrez:
                                for entrez in ens_to_entrez[gene]:
                                    if 'mapp' in type: entrez_data.write(entrez+'\tL\t'+pathway+'\n')
                                    else: entrez_data.write(entrez+'\t'+pathway+'\n')
                    except Exception: null=[]
            ens_data.close()
            try: entrez_data.close()
            except Exception: null=[]
Example #8
0
def justConvertFilenames(species,outputdir):
    import gene_associations
    gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
    import OBO_import; symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)
    
    for filename in os.listdir(outputdir):
        if '.pdf' in filename or '.png' in filename:
            fn = string.replace(filename,'.pdf','')
            fn = string.replace(fn,'.png','')
            newname=string.split(fn,'__')

            if newname[0] in gene_to_symbol:
                new_filename = str(filename)
                if '__' in filename:
                    new_filename = string.split(filename,'__')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename,'\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename,'/')[1]
                nnname=gene_to_symbol[newname[0]][0]+'-SashimiPlot_'+new_filename
                try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname))
                except Exception: pass
            else:
                continue
Example #9
0
def sashmi_plot_list(bamdir,
                     eventsToVisualizeFilename,
                     PSIFilename,
                     events=None):
    import gene_associations
    gene_to_symbol = gene_associations.getGeneToUid(species,
                                                    ('hide', 'Ensembl-Symbol'))
    import OBO_import
    symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)

    if events == None:
        splicing_events, expandedSearch = importSplicingEventsToVisualize(
            eventsToVisualizeFilename)
    else:
        ### Replace any ":" from the input events
        #for i in range(len(events)): events[i] = string.replace(events[i],':','__')
        expandedSearch = True

        for i in range(len(events)):
            gene = string.split(events[i], '__')[0]
            if gene in gene_to_symbol:
                symbol = gene_to_symbol[gene][0]
            elif 'ENS' not in gene or 'G0000' in gene:
                if gene in symbol_to_gene:
                    ensID = symbol_to_gene[gene][0]
                    symbol = gene
                    events[
                        i] = ensID  ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup
        splicing_events = events  ### optionally get from supplied variable

    if len(splicing_events) == 0:
        print eventsToVisualizeFilename
        forceNoCompatibleEventsInFile

    print 'Exporting plots',

    ### Determine Groups for Coloring
    groups_file = 'None'
    dir_list = unique.read_directory(root_dir + '/ExpressionInput')

    for file in dir_list:
        if 'groups.' in file:
            groups_file = root_dir + '/ExpressionInput/' + file

    if groups_file != None:
        try:
            import ExpressionBuilder
            sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file)
            groups = []
            for sample in sample_group_db:
                if sample_group_db[sample] not in groups:
                    groups.append(sample_group_db[sample]
                                  )  ### create an ordered list of unique group
        except Exception:
            groups = ['None']
            #print traceback.format_exc()
            pass

    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
        PSIFilename, bamdir, splicing_events, sample_group_db, groups, False)
    mopup_events = getMopUpEvents(splicing_events, processed_events)

    ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide
    #print len(splicing_events),len(processed_events),len(mopup_events)
    processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
        steady_state_exp_file, bamdir, mopup_events, sample_group_db, groups,
        expandedSearch)
    if len(processed_events) > 0:
        mopup_events = getMopUpEvents(mopup_events, processed_events)
        processed_events = formatAndSubmitSplicingEventsToSashimiPlot(
            PSIFilename, bamdir, mopup_events, sample_group_db, groups, True)
    return gene_to_symbol
Example #10
0
def reformatPolyAdenylationCoordinates(species,force):
    """ PolyA annotations are currently only available from UCSC for human, but flat file
    annotations from 2003-2006 are available for multiple species. Convert these to BED format"""
    version={}
    version['Rn'] = '2003(rn3)'
    version['Dr'] = '2003(zv4)'
    version['Gg'] = '2004(galGal2)'
    version['Hs'] = '2006(hg8)'
    version['Mm'] = '2004(mm5)'

    print 'Exporting polyADB_2 coordinates as BED for',species
    ### Obtain the necessary database files
    url = 'http://altanalyze.org/archiveDBs/all/polyAsite.txt'
    output_dir = 'AltDatabase/ucsc/'+species + '/'
    if force == 'yes':
        filename, status = update.download(url,output_dir,'')
    else: filename = output_dir+'polyAsite.txt'

    ### Import the refseq to Ensembl information
    import gene_associations; import OBO_import; import EnsemblImport; import export
    try:
        ens_unigene = gene_associations.getGeneToUid(species,'Ensembl-UniGene')
        print len(ens_unigene),'Ensembl-UniGene entries imported'
        external_ensembl = OBO_import.swapKeyValues(ens_unigene); use_entrez='no'
    except Exception:
        ens_entrez = gene_associations.getGeneToUid(species,'Ensembl-EntrezGene')
        print len(ens_entrez),'Ensembl-EntrezGene entries imported'
        external_ensembl = OBO_import.swapKeyValues(ens_entrez); use_entrez='yes'
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(species,'RNASeq','key_by_array')
    
    export_bedfile = output_dir+species+'_polyADB_2_predictions.bed'
    print 'exporting',export_bedfile
    export_data = export.ExportFile(export_bedfile)
    header = '#'+species+'\t'+'polyADB_2'+'\t'+version[species]+'\n'
    export_data.write(header)
    
    fn=filepath(filename); x=0; not_found={}
    for line in open(fn,'rU').xreadlines():
        data = cleanUpLine(line)
        if x==0: x=1
        else:
            siteid,llid,chr,sitenum,position,supporting_EST,cleavage = string.split(data,'\t')
            if chr == 'chrM': chr = 'chrMT' ### MT is the Ensembl convention whereas M is the Affymetrix and UCSC convention
            if chr == 'M': chr = 'MT' ### MT is the Ensembl convention whereas M is the Affymetrix and UCSC convention
            if species in siteid:
                if 'NA' not in chr: chr = 'chr'+chr
                strand = '+'; geneid = siteid
                pos_start = str(int(position)-1); pos_end = position
                if use_entrez=='no':
                    external_geneid = string.join(string.split(siteid,'.')[:2],'.')
                else: external_geneid=llid
                if external_geneid in external_ensembl:
                    ens_geneid = external_ensembl[external_geneid][0]
                    geneid += '-'+ens_geneid
                    chr,strand,start,end = gene_location_db[ens_geneid]
                else:
                    not_found[external_geneid]=[]
                    bed_format = string.join([chr,pos_start,pos_end,geneid,'0','-'],'\t')+'\n' ### We don't know the strand, so write out both strands
                    export_data.write(bed_format)
                bed_format = string.join([chr,pos_start,pos_end,geneid,'0',strand],'\t')+'\n'
                export_data.write(bed_format)
    export_data.close()   
Example #11
0
def performGeneSetORA(geneset_dir):
    """ Perform over-representation analysis (ORA) on any provided Gene Set """

    start_time = time.time()
    geneset_type = getResourceType(geneset_dir)
    #permuted_z_scores={}; original_mapp_z_score_data={}

    if geneset_type == 'Pathways': geneset_type = 'WikiPathways'
    ### Since MAPP tables can be provided by the user, allow the file to be missing
    if geneset_dir == 'UserSuppliedAssociations':
        gene_to_mapp = gene_associations.importGeneCustomData(
            species_code, system_codes, custom_sets_folder, mod)
        geneset_type = geneset_dir
    else:
        try:
            gene_to_mapp = gene_associations.importGeneMAPPData(
                species_code, geneset_dir)
        except Exception:
            gene_to_mapp = {}
    mapp_to_gene = OBO_import.swapKeyValues(gene_to_mapp)

    if len(gene_to_mapp) == 0:
        return 0, None
    else:
        ###Calculate primary z-scores for GeneSets
        mapp_to_mod_genes = getGenesInPathway(
            input_gene_list, gene_to_mapp)  ### For summary reporting
        mapp_input_gene_count, Rm, input_linked_mapp = countGenesInPathway(
            input_gene_list, gene_to_mapp, 'yes')
        mapp_denominator_gene_count, Nm, denom_linked_mapp = countGenesInPathway(
            denominator_gene_list, gene_to_mapp, 'yes')
        #print Nm,"unique genes, linked to GeneSets and in dataset and", Rm, "unique GeneSets\n linked genes matching criterion."
        calculateZScores(mapp_input_gene_count, mapp_denominator_gene_count,
                         Nm, Rm, mapp_to_gene, 'MAPP')

        if use_FET == 'no':
            permute_mapp_inputs = []
            ###Begin GeneSets Permutation Analysis
            try:
                original_increment = int(permutations / 10)
                increment = original_increment
            except Exception:
                null = None
            x = 0
            if permutations != 0: print '*',
            for permute_input_list in permute_inputs:
                if x == increment:
                    increment += original_increment
                    print '*',
                x += 1
                permute_mapp_input_gene_count, null, null = countGenesInPathway(
                    permute_input_list, gene_to_mapp, 'no')
                permute_mapp_inputs.append(permute_mapp_input_gene_count)
            calculatePermuteZScores(permute_mapp_inputs,
                                    mapp_denominator_gene_count, Nm, Rm)
            calculatePermuteStats(original_mapp_z_score_data)
        adjustPermuteStats(original_mapp_z_score_data)

        mapp_headers = formatHeaders(gene_file, input_count, input_linked_mapp,
                                     denom_count, denom_linked_mapp, Rm, Nm,
                                     'MAPP', OBO_date)
        exportPathwayData(original_mapp_z_score_data, gene_file, mapp_headers,
                          geneset_type, 'local')

        ### Export all gene associations (added in version 1.21)
        exportPathwayToGeneAssociations(mapp_to_mod_genes, mod, gene_file,
                                        gene_annotations, geneset_type,
                                        'local')

        end_time = time.time()
        time_diff = formatTime(start_time, end_time)
        print "Initial results for %s calculated in %s seconds" % (
            geneset_type, time_diff)
        permute_mapp_inputs = []

        return 1, mapp_to_mod_genes
Example #12
0
def reformatPolyAdenylationCoordinates(species, force):
    """ PolyA annotations are currently only available from UCSC for human, but flat file
    annotations from 2003-2006 are available for multiple species. Convert these to BED format"""
    version = {}
    version['Rn'] = '2003(rn3)'
    version['Dr'] = '2003(zv4)'
    version['Gg'] = '2004(galGal2)'
    version['Hs'] = '2006(hg8)'
    version['Mm'] = '2004(mm5)'

    print 'Exporting polyADB_2 coordinates as BED for', species
    ### Obtain the necessary database files
    url = 'http://altanalyze.org/archiveDBs/all/polyAsite.txt'
    output_dir = 'AltDatabase/ucsc/' + species + '/'
    if force == 'yes':
        filename, status = update.download(url, output_dir, '')
    else:
        filename = output_dir + 'polyAsite.txt'

    ### Import the refseq to Ensembl information
    import gene_associations
    import OBO_import
    import EnsemblImport
    import export
    try:
        ens_unigene = gene_associations.getGeneToUid(species,
                                                     'Ensembl-UniGene')
        print len(ens_unigene), 'Ensembl-UniGene entries imported'
        external_ensembl = OBO_import.swapKeyValues(ens_unigene)
        use_entrez = 'no'
    except Exception:
        ens_entrez = gene_associations.getGeneToUid(species,
                                                    'Ensembl-EntrezGene')
        print len(ens_entrez), 'Ensembl-EntrezGene entries imported'
        external_ensembl = OBO_import.swapKeyValues(ens_entrez)
        use_entrez = 'yes'
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(
        species, 'RNASeq', 'key_by_array')

    export_bedfile = output_dir + species + '_polyADB_2_predictions.bed'
    print 'exporting', export_bedfile
    export_data = export.ExportFile(export_bedfile)
    header = '#' + species + '\t' + 'polyADB_2' + '\t' + version[species] + '\n'
    export_data.write(header)

    fn = filepath(filename)
    x = 0
    not_found = {}
    for line in open(fn, 'rU').xreadlines():
        data = cleanUpLine(line)
        if x == 0: x = 1
        else:
            siteid, llid, chr, sitenum, position, supporting_EST, cleavage = string.split(
                data, '\t')
            if chr == 'chrM':
                chr = 'chrMT'  ### MT is the Ensembl convention whereas M is the Affymetrix and UCSC convention
            if chr == 'M':
                chr = 'MT'  ### MT is the Ensembl convention whereas M is the Affymetrix and UCSC convention
            if species in siteid:
                if 'NA' not in chr: chr = 'chr' + chr
                strand = '+'
                geneid = siteid
                pos_start = str(int(position) - 1)
                pos_end = position
                if use_entrez == 'no':
                    external_geneid = string.join(
                        string.split(siteid, '.')[:2], '.')
                else:
                    external_geneid = llid
                if external_geneid in external_ensembl:
                    ens_geneid = external_ensembl[external_geneid][0]
                    geneid += '-' + ens_geneid
                    chr, strand, start, end = gene_location_db[ens_geneid]
                else:
                    not_found[external_geneid] = []
                    bed_format = string.join(
                        [chr, pos_start, pos_end, geneid, '0', '-'], '\t'
                    ) + '\n'  ### We don't know the strand, so write out both strands
                    export_data.write(bed_format)
                bed_format = string.join(
                    [chr, pos_start, pos_end, geneid, '0', strand],
                    '\t') + '\n'
                export_data.write(bed_format)
    export_data.close()
Example #13
0
def performOntologyORA(ontology_dir):
    """ Perform over-representation analysis (ORA) on any provided Ontology """

    start_time = time.time()
    ontology_type = getResourceType(ontology_dir)

    ######### Import Gene-to-Nested-Ontology #########
    gene_to_ontology = gene_associations.importGeneToOntologyData(
        species_code, mod, 'nested', ontology_type)
    ontology_to_gene = OBO_import.swapKeyValues(gene_to_ontology)
    if len(gene_to_ontology) == 0:
        return 0, None
    else:
        ######### Calculate primary z-scores for GO terms
        ontology_to_mod_genes = getGenesInPathway(
            input_gene_list, gene_to_ontology)  ### For summary gene reporting
        ontology_input_gene_count, Rg, input_linked_ontology = countGenesInPathway(
            input_gene_list, gene_to_ontology, 'yes')
        ontology_denominator_gene_count, Ng, denom_linked_ontology = countGenesInPathway(
            denominator_gene_list, gene_to_ontology, 'yes')

        #print Ng,"unique genes, linked to GO and in dataset and", Rg, "unique GO linked genes matching criterion."
        calculateZScores(ontology_input_gene_count,
                         ontology_denominator_gene_count, Ng, Rg,
                         ontology_to_gene, 'Ontology')

        if use_FET == 'no':
            ###Begining Ontology Permutation Analysis
            try:
                original_increment = int(permutations / 10)
                increment = original_increment
            except Exception:
                null = None
            x = 0
            permute_ontology_inputs = []
            if permutations != 0: print '*',
            for permute_input_list in permute_inputs:
                ### http://docs.python.org/library/multiprocessing.html
                if x == increment:
                    increment += original_increment
                    print '*',
                x += 1
                permute_ontology_input_gene_count, null, null = countGenesInPathway(
                    permute_input_list, gene_to_ontology, 'no')
                permute_input_list = []
                permute_ontology_inputs.append(
                    permute_ontology_input_gene_count)
            #if permutations !=0: print 'Gene Ontology finished'
            calculatePermuteZScores(permute_ontology_inputs,
                                    ontology_denominator_gene_count, Ng, Rg)
            calculatePermuteStats(original_ontology_z_score_data)
        adjustPermuteStats(original_ontology_z_score_data)
        go_headers = formatHeaders(gene_file, input_count,
                                   input_linked_ontology, denom_count,
                                   denom_linked_ontology, Rg, Ng, 'Ontology',
                                   OBO_date)
        exportPathwayData(original_ontology_z_score_data, gene_file,
                          go_headers, ontology_type, 'Ontology')

        ### Export all gene associations (added in version 1.21)
        exportPathwayToGeneAssociations(ontology_to_mod_genes, mod, gene_file,
                                        gene_annotations, ontology_type,
                                        'Ontology')
        end_time = time.time()
        time_diff = formatTime(start_time, end_time)
        print "Initial results for %s calculated in %s seconds" % (
            ontology_type, time_diff)
        permute_ontology_inputs = []
        return 1, ontology_to_mod_genes
Example #14
0
def importGeneExpressionValues(filename,tissue_specific_db,translation_db,useLog=False,previouslyRun=False,species=None):
    ### Import gene-level expression raw values           
    fn=filepath(filename); x=0; genes_added={}; gene_expression_db={}
    dataset_name = export.findFilename(filename)
    max_val=0
    print 'importing:',dataset_name
    
    try:
        import gene_associations, OBO_import
        gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
        symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)
    except Exception: symbol_to_gene={}
    
    for line in open(fn,'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data,'\t')
        
        if x==0:
            if '#' not in data:
                for i in t[1:]: sample_headers.append(i)
                x=1
        else:
            gene = t[0]
            try: gene = string.split(t[0],'|')[0]
            except Exception: pass
            #if '-' not in gene and ':E' in gene: print gene;sys.exit()
            if analysis_type == 'AltExon':
                try: ens_gene,exon = string.split(gene,'-')[:2]
                except Exception: exon = gene
                gene = exon
            if keyed_by == 'translation': ### alternative value is 'primaryID'
                """if gene == 'ENSMUSG00000025915-E19.3':
                    for i in translation_db: print [i], len(translation_db); break
                    print gene, [translation_db[gene]];sys.exit()"""
                try: gene = translation_db[gene] ### Ensembl annotations
                except Exception: pass
            try: gene = symbol_to_gene[gene][0] ### If RNASeq is the selected platform and Symbol is the uid
            except Exception: pass
            if gene in tissue_specific_db:
                index,tissue_exp=tissue_specific_db[gene]
                try: genes_added[gene]+=1
                except Exception: genes_added[gene]=1
                proceed=True
                try:
                    exp_vals = t[1:]
                    if '' in exp_vals:
                        ### If missing values present (PSI values)
                        exp_vals = ['0.000101' if i=='' else i for i in exp_vals]
                        useLog = False
                    exp_vals = map(float, exp_vals)
                    if platform == 'RNASeq':
                        if max(exp_vals)>max_val: max_val = max(exp_vals)
                        #if max(exp_vals)<3: proceed=False
                        if useLog==False:
                            exp_vals = map(lambda x: math.log(x+1,2),exp_vals)
                    if value_type == 'calls': ### Hence, this is a DABG or RNA-Seq expression
                        exp_vals = produceDetectionCalls(exp_vals,targetPlatform) ### 0 or 1 calls
                    if proceed:
                        gene_expression_db[gene] = [index,exp_vals]
                except Exception:
                    print 'Non-numeric values detected:'
                    x = 5
                    print t[:x]
                    while x < t:
                        t[x:x+5]
                        x+=5
                    print 'Formatting error encountered in:',dataset_name; forceError
            """else:
                for gene in tissue_specific_db:
                    if 'Ndufa9:ENSMUSG00000000399:I2.1-E3.1' in gene:
                        print gene, 'dog';sys.exit()
                print gene;kill"""
        
    print len(gene_expression_db), 'matching genes in the dataset and tissue compendium database'
    
    for gene in genes_added:
        if genes_added[gene]>1:
            del gene_expression_db[gene] ### delete entries that are present in the input set multiple times (not trustworthy)
        else: expession_subset.append(gene_expression_db[gene]) ### These contain the rank order and expression
    #print len(expession_subset);sys.exit()
    expession_subset.sort() ### This order now matches that of 
    gene_expression_db=[]
    
    if max_val<20 and platform == 'RNASeq' and previouslyRun==False: ### Only allow to happen once
        importGeneExpressionValues(filename,tissue_specific_db,translation_db,useLog=True,previouslyRun=True,species=species)
Example #15
0
def importGeneExpressionValues(filename,tissue_specific_db,translation_db,useLog=False,previouslyRun=False,species=None):
    ### Import gene-level expression raw values           
    fn=filepath(filename); x=0; genes_added={}; gene_expression_db={}
    dataset_name = export.findFilename(filename)
    max_val=0
    print 'importing:',dataset_name
    
    try:
        import gene_associations, OBO_import
        gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol'))
        symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol)
    except Exception: symbol_to_gene={}
    
    for line in open(fn,'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data,'\t')
        
        if x==0:
            if '#' not in data:
                for i in t[1:]: sample_headers.append(i)
                x=1
        else:
            gene = t[0]
            #if '-' not in gene and ':E' in gene: print gene;sys.exit()
            if analysis_type == 'AltExon':
                try: ens_gene,exon = string.split(gene,'-')[:2]
                except Exception: exon = gene
                gene = exon
            if keyed_by == 'translation': ### alternative value is 'primaryID'
                """if gene == 'ENSMUSG00000025915-E19.3':
                    for i in translation_db: print [i], len(translation_db); break
                    print gene, [translation_db[gene]];sys.exit()"""
                try: gene = translation_db[gene] ### Ensembl annotations
                except Exception: pass
            try: gene = symbol_to_gene[gene][0] ### If RNASeq is the selected platform and Symbol is the uid
            except Exception: pass
            if gene in tissue_specific_db:
                index,tissue_exp=tissue_specific_db[gene]
                try: genes_added[gene]+=1
                except Exception: genes_added[gene]=1
                proceed=True
                try:
                    exp_vals = map(float, t[1:])
                    if platform == 'RNASeq':
                        if max(exp_vals)>max_val: max_val = max(exp_vals)
                        #if max(exp_vals)<3: proceed=False
                        if useLog==False:
                            exp_vals = map(lambda x: math.log(x+1,2),exp_vals)
                    if value_type == 'calls': ### Hence, this is a DABG or RNA-Seq expression
                        exp_vals = produceDetectionCalls(exp_vals,targetPlatform) ### 0 or 1 calls
                    if proceed:
                        gene_expression_db[gene] = [index,exp_vals]
                except Exception:
                    print 'Non-numeric values detected:'
                    x = 5
                    print t[:x]
                    while x < t:
                        t[x:x+5]
                        x+=5
                    print 'Formatting error encountered in:',dataset_name; forceError

    print len(gene_expression_db), 'matching genes in the dataset and tissue compendium database'
    
    for gene in genes_added:
        if genes_added[gene]>1: del gene_expression_db[gene] ### delete entries that are present in the input set multiple times (not trustworthy)
        else: expession_subset.append(gene_expression_db[gene]) ### These contain the rank order and expression
    #print len(expession_subset);sys.exit()
    expession_subset.sort() ### This order now matches that of 
    gene_expression_db=[]
    
    if max_val<20 and platform == 'RNASeq' and previouslyRun==False: ### Only allow to happen once
        importGeneExpressionValues(filename,tissue_specific_db,translation_db,useLog=True,previouslyRun=True,species=species)
def importDataSimple(filename, input_type, MOD=None, Species=None):
    id_db = {}
    fn = filepath(filename)
    x = 0
    for line in open(fn, 'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data, '\t')
        if data[0] == '#' and x == 0: x = 0
        elif x == 0:
            column_headers = t[1:]
            if input_type != 'LineageProfiler':
                try:
                    column_headers = t[
                        2]  ### exclude the ID, system code and p-value column headers
                except Exception:
                    column_headers = 'NA'
            x = 1
        else:
            if x == 1 and input_type != 'LineageProfiler':
                ### get system conversions
                system_code = t[1]
                import GO_Elite
                import OBO_import
                system_codes, source_types, mod_types = GO_Elite.getSourceData(
                )
                source_data = system_codes[system_code]
                try:
                    Mod = mod  ### global established in upstream functions
                    speciescode = species_code
                except Exception:
                    Mod = MOD
                    speciescode = Species
                if source_data == Mod:
                    source_is_mod = True
                else:
                    source_is_mod = False
                    mod_source = Mod + '-' + source_data + '.txt'
                    gene_to_source_id = gene_associations.getGeneToUid(
                        speciescode, ('hide', mod_source))
                    source_to_gene = OBO_import.swapKeyValues(
                        gene_to_source_id)
            if input_type != 'LineageProfiler':
                if source_is_mod == True:
                    try:
                        id_db[t[0]] = float(t[2])
                    except Exception:
                        id_db[t[0]] = 'NA'
                elif t[0] in source_to_gene:
                    mod_ids = source_to_gene[t[0]]
                    for mod_id in mod_ids:
                        try:
                            value = t[2]
                        except Exception:
                            value = 'NA'
                        if value == '+': value = 1
                        elif value == '-': value = -1
                        try:
                            id_db[mod_id] = float(
                                value
                            )  ### If multiple Ensembl IDs in dataset, only record the last associated fold change
                        except Exception:
                            id_db[mod_id] = 'NA'
                        break
            else:
                id_db[t[0]] = map(float, t[1:])  ### Applies to LineageProfiler
            x += 1
    #print len(id_db),column_headers
    return id_db, column_headers
Example #17
0
def generateMAPPFinderScores(species_title, species_id, source, mod_db,
                             system_Codes, permute, resources_to_analyze,
                             file_dirs, parent_root):
    global mappfinder_output_dir
    global custom_sets_folder
    global root
    root = parent_root
    global mapp_to_mod_genes
    global ontology_to_mod_genes
    global system_codes
    system_codes = system_Codes
    criterion_input_folder, criterion_denom_folder, output_dir, custom_sets_folder = file_dirs
    previous_denominator_file_dir = ''
    ontology_to_mod_genes = {}
    mapp_to_mod_genes = {}
    global test
    test = 'no'
    program_type, database_dir = unique.whatProgramIsThis()
    if resources_to_analyze == 'Gene Ontology':
        resources_to_analyze = 'GeneOntology'

    if len(output_dir) == 0: mappfinder_output_dir = 'input/MAPPFinder'
    else:
        mappfinder_output_dir = output_dir + '/GO-Elite_results/CompleteResults/ORA'

    global source_data
    source_data = source
    global mod
    mod = mod_db
    global species_code
    species_code = species_id
    global species_name
    species_name = species_title
    global gene_to_mapp
    global permutations
    permutations = permute
    global eliminate_redundant_genes
    eliminate_redundant_genes = 'yes'
    global permuted_z_scores
    global ontology_annotations
    global original_ontology_z_score_data
    global original_mapp_z_score_data
    global input_gene_list
    global denominator_gene_list
    global gene_file
    global denom_file_status
    global input_count
    global denom_count
    global gene_annotations
    global source_to_gene
    global use_FET
    if permutations == "FisherExactTest":
        use_FET = 'yes'  ### Use Fisher's Exact test instead of permutation-based p-values
        permutations = 0
    else:
        use_FET = 'no'

    start_time = time.time()

    gene_annotations = gene_associations.importGeneData(species_code, mod)

    OBO_date = importVersionData('OBO/')
    if len(criterion_input_folder) == 0:
        import_dir = '/input/GenesToQuery/' + species_code
        import_dir_alt = import_dir[1:]
    else:
        import_dir = criterion_input_folder
        import_dir_alt = criterion_input_folder
    m = GrabFiles()
    m.setdirectory(import_dir)
    try:
        dir_list = readDirText(
            import_dir
        )  #send a sub_directory to a function to identify all files in a directory
    except Exception:
        print_out = 'Warning! Input directory location is not a valid folder. Exiting GO-Elite.'
        ForceCriticalError(print_out)
    try:
        denom_dir_list = readDirText(criterion_denom_folder)
    except Exception:
        print_out = 'Warning! Denominator directory location is not a valid folder. Exiting GO-Elite.'
        ForceCriticalError(print_out)
    if len(dir_list) == 0:
        error_message = 'No files with the extension ".txt" found in the input directory.'
        ForceCriticalError(error_message)
    if len(denom_dir_list) == 0:
        error_message = 'No files with the extension ".txt" found in the denominator directory.'
        ForceCriticalError(error_message)

    inputs_analyzed = 0
    for mappfinder_input in dir_list:  #loop through each file in the directory
        permuted_z_scores = {}
        original_ontology_z_score_data = {}
        original_mapp_z_score_data = {}
        print 'Performing over-representation analysis (ORA) on', mappfinder_input
        gene_file_dir, gene_file = m.searchdirectory(mappfinder_input)
        ###Import Input gene/source-id lists
        input_gene_list, source_data_input, error_message = gene_associations.importUIDsForMAPPFinderQuery(
            import_dir_alt + '/' + gene_file, system_codes, 'no')
        input_count = len(input_gene_list)
        if 'WARNING!!!' in error_message:  ### Warn the user about SwissProt issues when importing the denominator
            ForceCriticalError(error_message)
        if len(criterion_denom_folder) == 0:
            denom_folder = '/input/GenesToQuery/' + species_code + '/DenominatorGenes'
        else:
            denom_folder = criterion_denom_folder
        error_warning = "\nThe directory\n" + '[' + denom_folder + ']' + "\nwas not found. Please create the directory\nand place an appropriate denominator file\nor files in it."
        denominator_file_dir = identifyGeneFiles(
            denom_folder,
            gene_file)  ###input is in input\Genes, denominator in
        try:
            denominator_file_dir = identifyGeneFiles(
                denom_folder,
                gene_file)  ###input is in input\Genes, denominator in
            denominator_file = string.split(denominator_file_dir, '/')[-1]
            print 'Using:', denominator_file, 'for the denominator.'
        except Exception:
            print_out = "WARNING: No denominator file included in\nthe Denominator directory.\nTo proceed, place all denominator\nIDs in a file in that directory."
            ForceCriticalError(print_out)
        if denominator_file_dir == previous_denominator_file_dir:
            denom_file_status = 'old'
        else:
            denom_file_status = 'new'
        if denom_file_status == 'new':
            previous_denominator_file_dir = denominator_file_dir
            denominator_gene_list, source_data_denom, error_message = gene_associations.importUIDsForMAPPFinderQuery(
                denominator_file_dir, system_codes, 'no')
            denom_count = len(denominator_gene_list)
            if 'SwissProt' in error_message and 'WARNING!!!' not in error_message:
                if len(input_gene_list) == 0:
                    error_message += '\nNo valid input IDs found. Exiting GO-Elite.'
                    try:
                        UI.WarningWindow(
                            error_message, 'Warning!!! Identifier Error'
                        )  ### Only warn, don't force an exit (if SwissProt full IDs are present)
                    except Exception:
                        None
                    sys.exit()
                else:
                    try:
                        UI.WarningWindow(
                            error_message, 'Warning!!! Identifier Error'
                        )  ### Only warn, don't force an exit (if SwissProt full IDs are present)
                    except Exception:
                        None
            elif len(error_message) > 0:
                ForceCriticalError(error_message)
            if len(denominator_gene_list) == len(input_gene_list):
                print_out = 'Input and Denominator lists have identical counts.\nPlease load a propper denominator set (containing\nthe input list with all assayed gene IDs) before proceeding.'
                ForceCriticalError(print_out)
            original_denominator_gene_list = []
            for id in denominator_gene_list:
                original_denominator_gene_list.append(
                    id
                )  ###need this to be a valid list not dictionary for permutation analysis
        if len(source_data_input) > 0:
            source_data = source_data_input  ###over-ride source_data if a source was identified from the input file
        if source_data != mod:
            if denom_file_status == 'new':
                mod_source = mod + '-' + source_data + '.txt'
                #checkDenominatorMatchesInput(input_gene_list,denominator_gene_list,gene_file) ###This is checked for the source IDs not associated MOD IDs
                try:
                    gene_to_source_id = gene_associations.getGeneToUid(
                        species_code, mod_source)
                    print mod_source, 'imported'
                except Exception:
                    try:
                        if mod == 'EntrezGene': mod = 'Ensembl'
                        else: mod = 'EntrezGene'
                        print 'The primary system (MOD) has been switched from', mod_db, 'to', mod, '\n(' + mod_db, 'not supported for the %s ID system).' % source_data
                        mod_source = mod + '-' + source_data + '.txt'
                        gene_to_source_id = gene_associations.getGeneToUid(
                            species_code, mod_source)
                    except Exception:
                        print_out = "WARNING: The primary gene ID system '" + mod + "'\ndoes not support relationships with '" + source_data + "'.\nRe-run using a supported primary ID system."
                        ForceCriticalError(print_out)
                source_to_gene = OBO_import.swapKeyValues(gene_to_source_id)
                denominator_gene_list = associateInputSourceWithGene(
                    source_to_gene, denominator_gene_list)
                ### Introduced the below method in version 1.21 to improve permutation speed (no longer need to search all source IDs)
                ### Only includes source ID to gene relationships represented in the denominator file (needed for Affymetrix)
                source_to_gene = OBO_import.swapKeyValues(
                    denominator_gene_list)
            ###Replace input lists with corresponding MOD IDs
            input_gene_list = associateInputSourceWithGene(
                source_to_gene, input_gene_list)
        checkDenominatorMatchesInput(
            input_gene_list, denominator_gene_list,
            gene_file)  ###This is for only the associated MOD IDs

        gd = GrabFiles()
        gd.setdirectory('/' + database_dir + '/' + species_code + '/gene-mapp')
        available_genesets = reorganizeResourceList(gd.getAllFiles(mod))
        od = GrabFiles()
        od.setdirectory('/' + database_dir + '/' + species_code + '/gene-go')
        available_ontologies = reorganizeResourceList(od.getAllFiles(mod))

        input_gene_count = len(
            input_gene_list
        )  ###Count number of genes associated with source input IDs
        if len(input_gene_list) == 0 or len(denominator_gene_list) == 0:
            if len(input_gene_list) == 0:
                print_out = 'WARNING!!!! None of the input IDs provided map to genes for ' + mappfinder_input + '. Check to make sure the selected species is correct.'
                print_out += '\nSelected species: ' + species_name
                print_out += '\nInput ID system: ' + str(source_data_input)
                print_out += '\nPrimary ID system (MOD): ' + str(mod)
                ForceCriticalError(print_out)
            if len(denominator_gene_list) == 0:
                print_out = 'WARNING!!!! None of the denominator IDs provided map to genes for ' + denominator_file_dir + '. Check to make sure the selected species is correct.'
                print_out += '\nSelected species: ' + species_name
                print_out += '\nDenominator ID system: ' + str(source)
                print_out += '\nPrimary ID system (MOD):' + str(mod)
                ForceCriticalError(print_out)
        elif len(available_ontologies) == 0 and len(available_genesets) == 0:
            print_out = 'WARNING!!!! No Ontology or GeneSets appear to be available for this species. Please supply and re-analyze.'
            ForceCriticalError(print_out)
        else:
            """ Perform permutation analysis and ORA on available GeneSets or Ontologies"""
            inputs_analyzed += 1

            global permute_inputs
            permute_inputs = []
            if permutations != 0 or use_FET == 'no':
                buildPermutationDatabase(original_denominator_gene_list,
                                         input_count)

            run_status = 0
            ### Analyzed ontologies
            if len(available_ontologies) > 0:
                print '    Analyzing input ID list with available ontologies'
            for ontology_dir in available_ontologies:
                ontology_type = getResourceType(ontology_dir)
                permuted_z_scores = {}
                original_ontology_z_score_data = {}
                #print ontology_type, resources_to_analyze
                if resources_to_analyze == ontology_type or resources_to_analyze == 'all':
                    ontology_annotations = importOntologyAnnotations(
                        species_code, ontology_type)
                    if ontology_annotations != None:  ### Occurs when the files are named or formatted correctly
                        status, ontology_to_mod_genes = performOntologyORA(
                            ontology_dir)
                        run_status += status

            ### Analyzed gene-sets
            if len(available_genesets) > 0:
                print '    Analyzing input ID list with available gene-sets'
            for geneset_dir in available_genesets:
                geneset_type = getResourceType(geneset_dir)
                permuted_z_scores = {}
                original_mapp_z_score_data = {}
                if resources_to_analyze == geneset_type or resources_to_analyze == 'all':
                    status, mapp_to_mod_genes = performGeneSetORA(geneset_dir)
                    run_status += status
            if len(custom_sets_folder) > 0:
                ### Hence - Analyze User Supplied GeneSets
                permuted_z_scores = {}
                original_mapp_z_score_data = {}
                run_status += performGeneSetORA('UserSuppliedAssociations')[0]

            permute_inputs = []
            permute_mapp_inputs = []
            ontology_input_gene_count = []
            mapp_input_gene_count = []

            if run_status == 0:
                ### Returns the number of successfully analyzed gene-set databases
                program_type, database_dir = unique.whatProgramIsThis()
                print_out = "Warning!!! Either the MOD you have selected: " + mod + "\nis missing the appropriate relationshipfiles necessary to run GO-Elite\nor you have selected an invalid resource to analyze.  Either replace\nthe missing MOD files in " + database_dir + '/' + species_code + ' sub-directories or\nselect a different MOD at run-time.'
                ForceCriticalError(print_out)

    end_time = time.time()
    time_diff = formatTime(start_time, end_time)
    print 'ORA analyses finished in %s seconds' % time_diff
    return ontology_to_mod_genes, mapp_to_mod_genes  ###Return the MOD genes associated with each GO term and MAPP