def exportChromosomeStrandCoordinates(species): import EnsemblImport gene_location_db = EnsemblImport.getEnsemblGeneLocations( species, 'RNASeq', 'key_by_array') import ExpressionBuilder gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations( species) export_path = 'GenMAPPDBs/' + species + '/chr_gene_locations.txt' export_data = export.ExportFile(export_path) import ExonAnalyze_module gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt" annotate_db = ExonAnalyze_module.import_annotations( gene_annotation_file, 'RNASeq') print 'Annotations for', len(gene_location_db), 'genes imported' sorted_list = [] protein_coding = 0 for gene in gene_location_db: chr, strand, start, end = gene_location_db[gene] if gene in gene_biotype_db: biotype = gene_biotype_db[gene][-1] if biotype == 'protein_coding': protein_coding += 1 else: biotype = 'NA' if len(chr) < 7: sorted_list.append( [chr, strand, int(start), int(end), gene, biotype]) #else: print chr;sys.exit() print len(sorted_list), 'genes for typical chromosomes present' print protein_coding, 'protein coding genes present' sorted_list.sort() for values in sorted_list: chr, strand, start, end, gene, biotype = values try: symbol = annotate_db[gene].Symbol() except Exception: symbol = '' values = [gene, symbol, chr, strand, str(start), str(end), biotype] export_data.write(string.join(values, '\t') + '\n') export_data.close() print species, 'chromosome locations exported to:\n', export_path
def exportChromosomeStrandCoordinates(species): import EnsemblImport gene_location_db = EnsemblImport.getEnsemblGeneLocations(species,'RNASeq','key_by_array') import ExpressionBuilder gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(species) export_path = 'GenMAPPDBs/'+species+'/chr_gene_locations.txt' export_data = export.ExportFile(export_path) import ExonAnalyze_module gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt" annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,'RNASeq') print 'Annotations for',len(gene_location_db),'genes imported' sorted_list=[]; protein_coding=0 for gene in gene_location_db: chr,strand,start,end = gene_location_db[gene] if gene in gene_biotype_db: biotype = gene_biotype_db[gene][-1] if biotype == 'protein_coding': protein_coding+=1 else: biotype = 'NA' if len(chr)<7: sorted_list.append([chr,strand,int(start),int(end),gene,biotype]) #else: print chr;sys.exit() print len(sorted_list),'genes for typical chromosomes present' print protein_coding, 'protein coding genes present' sorted_list.sort() for values in sorted_list: chr,strand,start,end,gene,biotype=values try: symbol = annotate_db[gene].Symbol() except Exception: symbol = '' values = [gene,symbol,chr,strand,str(start),str(end),biotype] export_data.write(string.join(values,'\t')+'\n') export_data.close() print species, 'chromosome locations exported to:\n',export_path
def displayExpressionGraph(species, Platform, exp_file, gene, transpose, display=True, showIntrons=False, analysisType='plot'): ### Get gene annotations (users can provide an Ensembl or symbol) print 'Importing exon-level expression data for visualization (be patient)...' import ExonAnalyze_module global platform platform = Platform if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt" else: gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt" genes = [] gene = string.replace(gene, '|', ',') gene = string.replace(gene, ' ', ',') if ',' in gene: genes += string.split(gene, ',') else: genes.append(gene) gene_db = {} for gene in genes: try: if 'ENS' in gene: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations( gene_annotation_file, platform, keyBySymbol=False) ### Make an SQLite call gene_symbol = annotate_db[gene].Symbol() else: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations( gene_annotation_file, platform, keyBySymbol=True) gene_symbol = gene gene = annotate_db[gene].GeneID() gene_db[gene] = gene_symbol except Exception: print gene, 'not in database' if len(gene_db) == 0: force_no_gene_found_error if 'AltResults' in exp_file: root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/' else: root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/' import ExonAnalyze_module if platform == 'RNASeq': datatype = 'exons' else: datatype = 'probesets' export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt' if verifyFileLength(export_exon_filename) == 0: rootdir = string.replace(root_dir, 'ExonPlots/', '') export_exon_filename = rootdir + '/' + export_exon_filename import ExonArrayEnsemblRules ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction( export_exon_filename, 'gene-probesets', gene_db) ### Make an SQLite call filter_db = {} for gene in ensembl_exon_db: ensembl_exon_db[gene].sort() for (index, ed, id) in ensembl_exon_db[gene]: filter_db[id] = [] try: os.mkdir(root_dir) except Exception: None ### dir exists print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.' importTableEntries(exp_file, filter_db, ensembl_exon_db, gene_db, root_dir, transpose, display, showIntrons, analysisType=analysisType) ### Make an SQLite call
def displayExpressionGraph(species,Platform,exp_file,gene,transpose,display=True,showIntrons=False,analysisType='plot'): ### Get gene annotations (users can provide an Ensembl or symbol) print 'Importing exon-level expression data for visualization (be patient)...' import ExonAnalyze_module global platform platform = Platform if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt" else: gene_annotation_file = "AltDatabase/"+species+"/"+platform+"/"+platform+"_gene_annotations.txt" genes=[] gene=string.replace(gene,'|',',') gene=string.replace(gene,' ',',') if ',' in gene: genes += string.split(gene,',') else: genes.append(gene) gene_db={} for gene in genes: try: if 'ENS' in gene: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=False) ### Make an SQLite call gene_symbol = annotate_db[gene].Symbol() else: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=True) gene_symbol = gene gene = annotate_db[gene].GeneID() gene_db[gene]=gene_symbol except Exception: #if len(gene)>0: print gene, 'not in database' pass if len(gene_db)==0: force_no_gene_found_error if 'AltResults' in exp_file: root_dir = string.split(exp_file,'AltResults')[0]+'ExonPlots/' else: root_dir = string.split(exp_file,'ExpressionInput')[0]+'ExonPlots/' import ExonAnalyze_module if platform == 'RNASeq': datatype = 'exons' else: datatype = 'probesets' export_exon_filename = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_'+datatype+'.txt' if verifyFileLength(export_exon_filename) == 0: rootdir = string.replace(root_dir,'ExonPlots/','') export_exon_filename = rootdir+'/'+export_exon_filename import ExonArrayEnsemblRules ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,'gene-probesets',gene_db) ### Make an SQLite call filter_db = {} for gene in ensembl_exon_db: ensembl_exon_db[gene].sort() for (index,ed,id) in ensembl_exon_db[gene]: filter_db[id] = [] try: os.mkdir(root_dir) except Exception: None ### dir exists print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.' importTableEntries(exp_file,filter_db,ensembl_exon_db,gene_db,root_dir,transpose,display,showIntrons,analysisType=analysisType) ### Make an SQLite call