Esempio n. 1
0
def exportChromosomeStrandCoordinates(species):
    import EnsemblImport
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(
        species, 'RNASeq', 'key_by_array')

    import ExpressionBuilder
    gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(
        species)
    export_path = 'GenMAPPDBs/' + species + '/chr_gene_locations.txt'
    export_data = export.ExportFile(export_path)

    import ExonAnalyze_module
    gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    annotate_db = ExonAnalyze_module.import_annotations(
        gene_annotation_file, 'RNASeq')

    print 'Annotations for', len(gene_location_db), 'genes imported'

    sorted_list = []
    protein_coding = 0
    for gene in gene_location_db:
        chr, strand, start, end = gene_location_db[gene]
        if gene in gene_biotype_db:
            biotype = gene_biotype_db[gene][-1]
            if biotype == 'protein_coding': protein_coding += 1

        else: biotype = 'NA'
        if len(chr) < 7:
            sorted_list.append(
                [chr, strand, int(start),
                 int(end), gene, biotype])
        #else: print chr;sys.exit()
    print len(sorted_list), 'genes for typical chromosomes present'
    print protein_coding, 'protein coding genes present'
    sorted_list.sort()
    for values in sorted_list:
        chr, strand, start, end, gene, biotype = values
        try:
            symbol = annotate_db[gene].Symbol()
        except Exception:
            symbol = ''
        values = [gene, symbol, chr, strand, str(start), str(end), biotype]
        export_data.write(string.join(values, '\t') + '\n')
    export_data.close()
    print species, 'chromosome locations exported to:\n', export_path
Esempio n. 2
0
def exportChromosomeStrandCoordinates(species):
    import EnsemblImport
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(species,'RNASeq','key_by_array')

    import ExpressionBuilder
    gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(species)
    export_path = 'GenMAPPDBs/'+species+'/chr_gene_locations.txt'
    export_data = export.ExportFile(export_path)

    import ExonAnalyze_module
    gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt"
    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,'RNASeq')
      
    print 'Annotations for',len(gene_location_db),'genes imported'
    
    sorted_list=[]; protein_coding=0 
    for gene in gene_location_db:
        chr,strand,start,end = gene_location_db[gene]
        if gene in gene_biotype_db:
            biotype = gene_biotype_db[gene][-1]
            if biotype == 'protein_coding': protein_coding+=1
                
        else: biotype = 'NA'
        if len(chr)<7:
            sorted_list.append([chr,strand,int(start),int(end),gene,biotype])
        #else: print chr;sys.exit()
    print len(sorted_list),'genes for typical chromosomes present'
    print protein_coding, 'protein coding genes present'
    sorted_list.sort()        
    for values in sorted_list:
        chr,strand,start,end,gene,biotype=values
        try: symbol = annotate_db[gene].Symbol()
        except Exception: symbol = ''
        values = [gene,symbol,chr,strand,str(start),str(end),biotype]
        export_data.write(string.join(values,'\t')+'\n')
    export_data.close()
    print species, 'chromosome locations exported to:\n',export_path
Esempio n. 3
0
def displayExpressionGraph(species,
                           Platform,
                           exp_file,
                           gene,
                           transpose,
                           display=True,
                           showIntrons=False,
                           analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse':
        gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    else:
        gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt"

    genes = []
    gene = string.replace(gene, '|', ',')
    gene = string.replace(gene, ' ', ',')
    if ',' in gene:
        genes += string.split(gene, ',')
    else:
        genes.append(gene)
    gene_db = {}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform,
                        keyBySymbol=False)  ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform, keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene] = gene_symbol
        except Exception:
            print gene, 'not in database'

    if len(gene_db) == 0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/'
    else:
        root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/'

    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir, 'ExonPlots/', '')
        export_exon_filename = rootdir + '/' + export_exon_filename

    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(
        export_exon_filename, 'gene-probesets',
        gene_db)  ### Make an SQLite call

    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        for (index, ed, id) in ensembl_exon_db[gene]:
            filter_db[id] = []

    try:
        os.mkdir(root_dir)
    except Exception:
        None  ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,
                       filter_db,
                       ensembl_exon_db,
                       gene_db,
                       root_dir,
                       transpose,
                       display,
                       showIntrons,
                       analysisType=analysisType)  ### Make an SQLite call
Esempio n. 4
0
def displayExpressionGraph(species,Platform,exp_file,gene,transpose,display=True,showIntrons=False,analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt"
    else: gene_annotation_file = "AltDatabase/"+species+"/"+platform+"/"+platform+"_gene_annotations.txt"

    genes=[]
    gene=string.replace(gene,'|',',')
    gene=string.replace(gene,' ',',')
    if ',' in gene:
        genes += string.split(gene,',')
    else: genes.append(gene)    
    gene_db={}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=False) ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene]=gene_symbol
        except Exception:
            #if len(gene)>0: print gene, 'not in database'
            pass
        
    if len(gene_db)==0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file,'AltResults')[0]+'ExonPlots/'
    else:
        root_dir = string.split(exp_file,'ExpressionInput')[0]+'ExonPlots/'
        
    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_'+datatype+'.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir,'ExonPlots/','')
        export_exon_filename = rootdir+'/'+export_exon_filename
    
    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,'gene-probesets',gene_db) ### Make an SQLite call
    
    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        
        for (index,ed,id) in ensembl_exon_db[gene]:
            filter_db[id] = []
            
    try: os.mkdir(root_dir)
    except Exception: None ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,filter_db,ensembl_exon_db,gene_db,root_dir,transpose,display,showIntrons,analysisType=analysisType) ### Make an SQLite call