def grabExonIntronPromoterSequences(species,array_type,data_type,output_types): ### output_types could be adjacent intron sequences, adjacent exon sequences, targets exon sequence or promoter sequence_input_dir_list=[] if data_type == 'probeset': sequence_input_dir = '/AltResults/AlternativeOutput/'+array_type+'/sequence_input' if data_type == 'gene': sequence_input_dir = '/ExpressionOutput/'+array_type+'/sequence_input' dir_list = read_directory(sequence_input_dir) for input_file in dir_list: filedir = sequence_input_dir[1:]+'/'+input_file filter_db = inputResultFiles(filedir,data_type) export_exon_filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_probesets.txt' ensembl_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,data_type,filter_db) """for gene in ensembl_probeset_db: if gene == 'ENSG00000139737': for x in ensembl_probeset_db[gene]: exon_id,((probe_start,probe_stop,probeset_id,exon_class,transcript_clust),ed) = x print gene, ed.ExonID() kill""" analysis_type = 'get_sequence' dir = 'AltDatabase/ensembl/'+species+'/'; gene_seq_filename = dir+species+'_gene-seq-2000_flank' ensembl_probeset_db = EnsemblImport.import_sequence_data(gene_seq_filename,ensembl_probeset_db,species,analysis_type) """ critical_exon_file = 'AltDatabase/'+species+'/'+ array_type + '/' + array_type+'_critical-exon-seq.txt' if output_types == 'all' and data_type == 'probeset': output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns'] else: output_types = [output_types] for output_type in output_types: sequence_input_dir = string.replace(sequence_input_dir,'_input','_output') filename = sequence_input_dir[1:]+'/ExportedSequence-'+data_type+'-'+output_type+'.txt' exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type) """ if output_types == 'all' and data_type == 'probeset': output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns'] else: output_types = [output_types] for output_type in output_types: sequence_input_dir2 = string.replace(sequence_input_dir,'_input','_output') filename = sequence_input_dir2[1:]+'/'+input_file[:-4]+'-'+data_type+'-'+output_type+'.txt' exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type)
def displayExpressionGraph(species, Platform, exp_file, gene, transpose, display=True, showIntrons=False, analysisType='plot'): ### Get gene annotations (users can provide an Ensembl or symbol) print 'Importing exon-level expression data for visualization (be patient)...' import ExonAnalyze_module global platform platform = Platform if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt" else: gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt" genes = [] gene = string.replace(gene, '|', ',') gene = string.replace(gene, ' ', ',') if ',' in gene: genes += string.split(gene, ',') else: genes.append(gene) gene_db = {} for gene in genes: try: if 'ENS' in gene: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations( gene_annotation_file, platform, keyBySymbol=False) ### Make an SQLite call gene_symbol = annotate_db[gene].Symbol() else: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations( gene_annotation_file, platform, keyBySymbol=True) gene_symbol = gene gene = annotate_db[gene].GeneID() gene_db[gene] = gene_symbol except Exception: print gene, 'not in database' if len(gene_db) == 0: force_no_gene_found_error if 'AltResults' in exp_file: root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/' else: root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/' import ExonAnalyze_module if platform == 'RNASeq': datatype = 'exons' else: datatype = 'probesets' export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt' if verifyFileLength(export_exon_filename) == 0: rootdir = string.replace(root_dir, 'ExonPlots/', '') export_exon_filename = rootdir + '/' + export_exon_filename import ExonArrayEnsemblRules ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction( export_exon_filename, 'gene-probesets', gene_db) ### Make an SQLite call filter_db = {} for gene in ensembl_exon_db: ensembl_exon_db[gene].sort() for (index, ed, id) in ensembl_exon_db[gene]: filter_db[id] = [] try: os.mkdir(root_dir) except Exception: None ### dir exists print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.' importTableEntries(exp_file, filter_db, ensembl_exon_db, gene_db, root_dir, transpose, display, showIntrons, analysisType=analysisType) ### Make an SQLite call
def displayExpressionGraph(species,Platform,exp_file,gene,transpose,display=True,showIntrons=False,analysisType='plot'): ### Get gene annotations (users can provide an Ensembl or symbol) print 'Importing exon-level expression data for visualization (be patient)...' import ExonAnalyze_module global platform platform = Platform if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt" else: gene_annotation_file = "AltDatabase/"+species+"/"+platform+"/"+platform+"_gene_annotations.txt" genes=[] gene=string.replace(gene,'|',',') gene=string.replace(gene,' ',',') if ',' in gene: genes += string.split(gene,',') else: genes.append(gene) gene_db={} for gene in genes: try: if 'ENS' in gene: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=False) ### Make an SQLite call gene_symbol = annotate_db[gene].Symbol() else: try: annotate_db ### If variable is defined except Exception: annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=True) gene_symbol = gene gene = annotate_db[gene].GeneID() gene_db[gene]=gene_symbol except Exception: #if len(gene)>0: print gene, 'not in database' pass if len(gene_db)==0: force_no_gene_found_error if 'AltResults' in exp_file: root_dir = string.split(exp_file,'AltResults')[0]+'ExonPlots/' else: root_dir = string.split(exp_file,'ExpressionInput')[0]+'ExonPlots/' import ExonAnalyze_module if platform == 'RNASeq': datatype = 'exons' else: datatype = 'probesets' export_exon_filename = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_'+datatype+'.txt' if verifyFileLength(export_exon_filename) == 0: rootdir = string.replace(root_dir,'ExonPlots/','') export_exon_filename = rootdir+'/'+export_exon_filename import ExonArrayEnsemblRules ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,'gene-probesets',gene_db) ### Make an SQLite call filter_db = {} for gene in ensembl_exon_db: ensembl_exon_db[gene].sort() for (index,ed,id) in ensembl_exon_db[gene]: filter_db[id] = [] try: os.mkdir(root_dir) except Exception: None ### dir exists print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.' importTableEntries(exp_file,filter_db,ensembl_exon_db,gene_db,root_dir,transpose,display,showIntrons,analysisType=analysisType) ### Make an SQLite call
def getJunctionComparisonsFromExport(species,array_type): type = 'standard' gene_junction_db = importEnsemblUCSCAltJunctions(species,type) ### Retrieve probesets with exon-junctions associated - these are critical exons filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_'+array_type+'_probesets.txt' gene_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(filename,'junctions',{}) left={}; right={}; gene_db={}; gene_exon_db={}; nonjunction_aligning={} for gene in gene_probeset_db: for (probe_data,ed) in gene_probeset_db[gene]: probeset, strand, probeset_start, probeset_stop = probe_data region_id = string.replace(ed.RegionNumber(),'-','.') original_region_id = region_id region_ids = string.split(region_id,'|') gene_db[probeset[:-2]]=gene #ed.AssociatedSplicingJunctions() r_starts=string.split(ed.ExonStart(),'|'); r_stops=string.split(ed.ExonStop(),'|') for region_id in region_ids: if '|5' in probeset: try: left[probeset[:-2]].append(region_id) except Exception: left[probeset[:-2]]=[region_id] if strand == '+': ### If the junction probesets DO NOT align to the region coordinates, then the probeset maps to a junction outside the database if probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'left' elif probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'left' elif '|3' in probeset: try: right[probeset[:-2]].append(region_id) except Exception: right[probeset[:-2]]=[region_id] if strand == '+': if probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'right' elif probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'right' else: if '_' in region_id: print killer try: gene_exon_db[gene,region_id].append(probeset) except Exception: gene_exon_db[gene,region_id] = [probeset] print 'len(nonjunction_aligning)',len(nonjunction_aligning) gene_exon_db = eliminateRedundant(gene_exon_db) junction_db={} ### Get the exon-region IDs for an exon-junction for probeset in left: gene = gene_db[probeset] if probeset in right: for region1 in left[probeset]: for region2 in right[probeset]: junction = region1+'-'+region2 try: junction_db[gene,junction].append(probeset) except Exception: junction_db[gene,junction] = [probeset] probeset_junction_export = 'AltDatabase/' + species + '/'+array_type+'/'+ species + '_junction_comps.txt' fn=filepath(probeset_junction_export); data = open(fn,'w') print "Exporting",probeset_junction_export title = 'gene'+'\t'+'critical_exon'+'\t'+'exclusion_junction_region'+'\t'+'inclusion_junction_region'+'\t'+'exclusion_probeset'+'\t'+'inclusion_probeset'+'\t'+'data_source'+'\n' data.write(title); temp_list=[] for (gene,critical_exon,incl_junction,excl_junction) in gene_junction_db: if (gene,incl_junction) in junction_db: incl_junction_probesets = junction_db[gene,incl_junction] if (gene,excl_junction) in junction_db: excl_junction_probesets = junction_db[gene,excl_junction] for incl_junction_probeset in incl_junction_probesets: for excl_junction_probeset in excl_junction_probesets: try: for incl_exon_probeset in gene_exon_db[gene,critical_exon]: if incl_junction_probeset in nonjunction_aligning or excl_junction_probeset in nonjunction_aligning: null=[] else: ### Ensure the probeset DOES map to the annotated junctions temp_list.append(string.join([gene,critical_exon,excl_junction,critical_exon,excl_junction_probeset,incl_exon_probeset,'AltAnalyze'],'\t')+'\n') except Exception: null=[] if incl_junction_probeset in nonjunction_aligning: new_region_id, side = nonjunction_aligning[incl_junction_probeset] incl_junction = renameJunction(incl_junction,side,new_region_id) if excl_junction_probeset in nonjunction_aligning: new_region_id, side = nonjunction_aligning[excl_junction_probeset] excl_junction = renameJunction(excl_junction,side,new_region_id) if excl_junction_probeset!=incl_junction_probeset: temp_list.append(string.join([gene,critical_exon,excl_junction,incl_junction,excl_junction_probeset,incl_junction_probeset,'AltAnalyze'],'\t')+'\n') temp_list = unique.unique(temp_list) for i in temp_list: data.write(i) data.close() print 'Number of compared junctions exported', len(temp_list)