def sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=None): import gene_associations gene_to_symbol = gene_associations.getGeneToUid(species,('hide','Ensembl-Symbol')) import OBO_import; symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol) if events==None: splicing_events,expandedSearch = importSplicingEventsToVisualize(eventsToVisualizeFilename) else: ### Replace any ":" from the input events #for i in range(len(events)): events[i] = string.replace(events[i],':','__') expandedSearch = True for i in range(len(events)): gene = string.split(events[i],'__')[0] if gene in gene_to_symbol: symbol = gene_to_symbol[gene][0] elif 'ENS' not in gene or 'G0000' in gene: if gene in symbol_to_gene: ensID = symbol_to_gene[gene][0] symbol = gene events[i] = ensID ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup splicing_events = events ### optionally get from supplied variable if len(splicing_events)==0: print eventsToVisualizeFilename forceNoCompatibleEventsInFile print 'Exporting plots', ### Determine Groups for Coloring groups_file = 'None' dir_list = unique.read_directory(root_dir+'/ExpressionInput') for file in dir_list: if 'groups.' in file: groups_file = root_dir+'/ExpressionInput/'+file if groups_file != None: try: import ExpressionBuilder sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file) groups=[] for sample in sample_group_db: if sample_group_db[sample] not in groups: groups.append(sample_group_db[sample]) ### create an ordered list of unique group except Exception: groups = ['None'] #print traceback.format_exc() pass processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, splicing_events, sample_group_db, groups, False) mopup_events = getMopUpEvents(splicing_events, processed_events) ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide #print len(splicing_events),len(processed_events),len(mopup_events) processed_events = formatAndSubmitSplicingEventsToSashimiPlot(steady_state_exp_file,bamdir,mopup_events,sample_group_db,groups,expandedSearch) if len(processed_events)>0: mopup_events = getMopUpEvents(mopup_events, processed_events) processed_events = formatAndSubmitSplicingEventsToSashimiPlot(PSIFilename, bamdir, mopup_events, sample_group_db, groups, True) return gene_to_symbol
def matrixImport(filename): matrix={} original_data={} headerRow=True for line in open(filename,'rU').xreadlines(): original_line = line data = line.rstrip() values = string.split(data,'\t') if headerRow: group_db={} groups=[] if ':' in data: group_sample_list = map(lambda x: string.split(x,':'),values[1:]) index=1 for (g,s) in group_sample_list: try: group_db[g].append(index) except Exception: group_db[g] = [index] index+=1 if g not in groups: groups.append(g) else: import ExpressionBuilder search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput' files = unique.read_directory(search_dir) for file in files: if 'groups.' in file: sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file) index=0 for s in values[1:]: g = sample_group_db[s] try: group_db[g].append(index) except Exception: group_db[g] = [index] index+=1 if g not in groups: groups.append(g) headerRow = False grouped_values=[] original_data['header'] = original_line else: key = values[0] grouped_floats=[] float_values = [] for g in groups: ### string values gvalues_list=[] for i in group_db[g]: if values[i] != '0': try: gvalues_list.append(float(values[i])) except Exception: pass else: try: gvalues_list.append('') ### Thus are missing values except Exception: pass grouped_floats.append(gvalues_list) matrix[key] = grouped_floats if '\n' not in original_line: original_line+='\n' original_data[key] = original_line last_line = line return matrix,original_data
def remoteSashimiPlot(Species, fl, bamdir, eventsToVisualizeFilename, events=None, show=False): global PSIFilename global outputdir global root_dir global steady_state_exp_file global species species = Species try: countinp = fl.CountsFile() root_dir = fl.RootDir() except Exception: root_dir = fl search_dir = root_dir + '/ExpressionInput' files = unique.read_directory(search_dir) for file in files: if 'counts.' in file and 'steady-state.txt' not in file: countinp = search_dir + '/' + file PSIFilename = root_dir + '/AltResults/AlternativeOutput/' + species + '_RNASeq_top_alt_junctions-PSI.txt' import ExpressionBuilder dir_list = unique.read_directory(root_dir + '/ExpressionInput') for file in dir_list: if 'exp.' in file and 'steady-state' not in file: exp_file = root_dir + '/ExpressionInput/' + file elif 'exp.' in file and 'steady-state' in file: steady_state_exp_file = root_dir + '/ExpressionInput/' + file global sample_group_db sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file) #outputdir=findParentDir(PSIFilename)+"sashimiplots" outputdir = root_dir + '/ExonPlots' outputdir = root_dir + '/SashimiPlots' try: os.mkdir(unique.filepath(outputdir)) except Exception: pass if show: s = open(outputdir + '/show.txt', 'w') s.write('TRUE') s.close() else: s = open(outputdir + '/show.txt', 'w') s.write('FALSE') s.close() geneSymbol_db = Sashimiplottting(bamdir, countinp, PSIFilename, eventsToVisualizeFilename, events=events) for filename in os.listdir(outputdir): if '.pdf' in filename or '.png' in filename: fn = string.replace(filename, '.pdf', '') fn = string.replace(fn, '.png', '') newname = string.split(fn, '__') if newname[0] in geneSymbol_db: new_filename = str(filename) if '__' in filename: new_filename = string.split(filename, '__')[1] elif '\\' in filename: new_filename = string.split(filename, '\\')[1] elif '/' in filename: new_filename = string.split(filename, '/')[1] nnname = geneSymbol_db[ newname[0]][0] + '-SashimiPlot_' + new_filename try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname)) except Exception: if 'already exists' in traceback.format_exc(): ### File already exists, delete the new one try: os.remove(os.path.join(outputdir, nnname)) except Exception: pass ### Now right the new one try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname)) except Exception: pass pass else: continue print ''
def sashmi_plot_list(bamdir, eventsToVisualizeFilename, PSIFilename, events=None): try: import gene_associations gene_to_symbol = gene_associations.getGeneToUid( species, ('hide', 'Ensembl-Symbol')) from import_scripts import OBO_import symbol_to_gene = OBO_import.swapKeyValues(gene_to_symbol) except Exception: symbol_to_gene = {} if events == None: splicing_events, expandedSearch = importSplicingEventsToVisualize( eventsToVisualizeFilename) else: ### Replace any ":" from the input events #for i in range(len(events)): events[i] = string.replace(events[i],':','__') expandedSearch = True for i in range(len(events)): gene = string.split(events[i], '__')[0] if gene in gene_to_symbol: symbol = gene_to_symbol[gene][0] elif 'ENS' not in gene or 'G0000' in gene: if gene in symbol_to_gene: ensID = symbol_to_gene[gene][0] symbol = gene events[ i] = ensID ### translate this ID to an Ensembl gene ID for propper SashimiPlot lookup splicing_events = events ### optionally get from supplied variable if len(splicing_events) == 0: print eventsToVisualizeFilename forceNoCompatibleEventsInFile print 'Exporting plots', ### Determine Groups for Coloring groups_file = 'None' dir_list = unique.read_directory(root_dir + '/ExpressionInput') for file in dir_list: if 'groups.' in file: groups_file = root_dir + '/ExpressionInput/' + file if groups_file != None: try: import ExpressionBuilder sample_group_db = ExpressionBuilder.simplerGroupImport(groups_file) groups = [] for sample in sample_group_db: if sample_group_db[sample] not in groups: groups.append(sample_group_db[sample] ) ### create an ordered list of unique group except Exception: groups = ['None'] #print traceback.format_exc() pass processed_events = formatAndSubmitSplicingEventsToSashimiPlot( PSIFilename, bamdir, splicing_events, sample_group_db, groups, False) mopup_events = getMopUpEvents(splicing_events, processed_events) ### Do the same for supplied gene queries or junctions that didn't map above using the gene expression values as a guide #print len(splicing_events),len(processed_events),len(mopup_events) processed_events = formatAndSubmitSplicingEventsToSashimiPlot( steady_state_exp_file, bamdir, mopup_events, sample_group_db, groups, expandedSearch) if len(processed_events) > 0: mopup_events = getMopUpEvents(mopup_events, processed_events) processed_events = formatAndSubmitSplicingEventsToSashimiPlot( PSIFilename, bamdir, mopup_events, sample_group_db, groups, True) return gene_to_symbol
def remoteSashimiPlot(Species, fl, bamdir, eventsToVisualizeFilename, events=None, show=False): global PSIFilename global outputdir global root_dir global steady_state_exp_file global species species = Species try: countinp = fl.CountsFile() root_dir = fl.RootDir() except Exception: root_dir = fl search_dir = root_dir + "/ExpressionInput" files = unique.read_directory(search_dir) for file in files: if "counts." in file and "steady-state.txt" not in file: countinp = search_dir + "/" + file PSIFilename = root_dir + "/AltResults/AlternativeOutput/" + species + "_RNASeq_top_alt_junctions-PSI.txt" import ExpressionBuilder dir_list = unique.read_directory(root_dir + "/ExpressionInput") for file in dir_list: if "exp." in file and "steady-state" not in file: exp_file = root_dir + "/ExpressionInput/" + file elif "exp." in file and "steady-state" in file: steady_state_exp_file = root_dir + "/ExpressionInput/" + file global sample_group_db sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file) # outputdir=findParentDir(PSIFilename)+"sashimiplots" outputdir = root_dir + "/ExonPlots" outputdir = root_dir + "/SashimiPlots" try: os.mkdir(unique.filepath(outputdir)) except Exception: pass if show: s = open(outputdir + "/show.txt", "w") s.write("TRUE") s.close() else: s = open(outputdir + "/show.txt", "w") s.write("FALSE") s.close() geneSymbol_db = Sashimiplottting(bamdir, countinp, PSIFilename, eventsToVisualizeFilename, events=events) for filename in os.listdir(outputdir): if ".pdf" in filename or ".png" in filename: fn = string.replace(filename, ".pdf", "") fn = string.replace(fn, ".png", "") newname = string.split(fn, "__") if newname[0] in geneSymbol_db: new_filename = str(filename) if "__" in filename: new_filename = string.split(filename, "__")[1] elif "\\" in filename: new_filename = string.split(filename, "\\")[1] elif "/" in filename: new_filename = string.split(filename, "/")[1] nnname = geneSymbol_db[newname[0]][0] + "-SashimiPlot_" + new_filename try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname)) except Exception: if "already exists" in traceback.format_exc(): ### File already exists, delete the new one try: os.remove(os.path.join(outputdir, nnname)) except Exception: pass ### Now right the new one try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname)) except Exception: pass pass else: continue print ""
def remoteSashimiPlot(Species,fl,bamdir,eventsToVisualizeFilename,events=None,show=False): global PSIFilename global outputdir global root_dir global steady_state_exp_file global species species = Species try: countinp = fl.CountsFile() root_dir = fl.RootDir() except Exception: root_dir = fl search_dir = root_dir+'/ExpressionInput' files = unique.read_directory(search_dir) for file in files: if 'counts.' in file and 'steady-state.txt' not in file: countinp = search_dir+'/'+file PSIFilename = root_dir+'/AltResults/AlternativeOutput/'+species+'_RNASeq_top_alt_junctions-PSI.txt' import ExpressionBuilder dir_list = unique.read_directory(root_dir+'/ExpressionInput') for file in dir_list: if 'exp.' in file and 'steady-state' not in file: exp_file = root_dir+'/ExpressionInput/'+file elif 'exp.' in file and 'steady-state' in file: steady_state_exp_file = root_dir+'/ExpressionInput/'+file global sample_group_db sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file) #outputdir=findParentDir(PSIFilename)+"sashimiplots" outputdir = root_dir+'/ExonPlots' outputdir = root_dir+'/SashimiPlots' try: os.mkdir(unique.filepath(outputdir)) except Exception: pass if show: s = open(outputdir+'/show.txt','w') s.write('TRUE'); s.close() else: s = open(outputdir+'/show.txt','w') s.write('FALSE'); s.close() geneSymbol_db=Sashimiplottting(bamdir,countinp,PSIFilename,eventsToVisualizeFilename,events=events) for filename in os.listdir(outputdir): if '.pdf' in filename or '.png' in filename: fn = string.replace(filename,'.pdf','') fn = string.replace(fn,'.png','') newname=string.split(fn,'__') if newname[0] in geneSymbol_db: new_filename = str(filename) if '__' in filename: new_filename = string.split(filename,'__')[1] elif '\\' in filename: new_filename = string.split(filename,'\\')[1] elif '/' in filename: new_filename = string.split(filename,'/')[1] nnname=geneSymbol_db[newname[0]][0]+'-SashimiPlot_'+new_filename try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname)) except Exception: if 'already exists' in traceback.format_exc(): ### File already exists, delete the new one try: os.remove(os.path.join(outputdir,nnname)) except Exception: pass ### Now right the new one try: os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname)) except Exception: pass pass else: continue print ''
def matrixImport(filename): matrix={} compared_groups={} ### track which values correspond to which groups for pairwise group comparisons original_data={} headerRow=True for line in open(filename,'rU').xreadlines(): original_line = line data = line.rstrip() values = string.split(data,'\t') #print len(values) if headerRow: group_db={} groups=[] if ':' in data: group_sample_list = map(lambda x: string.split(x,':'),values[1:]) index=1 for (g,s) in group_sample_list: try: group_db[g].append(index) except Exception: group_db[g] = [index] index+=1 if g not in groups: groups.append(g) else: import ExpressionBuilder search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput' files = unique.read_directory(search_dir) for file in files: if 'groups.' in file and '.txt' in file: #print file sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file) index=0; count=0 for s in values[1:]: if s in sample_group_db: g = sample_group_db[s] try: group_db[g].append(index) except Exception: group_db[g] = [index] count+=1 if g not in groups: groups.append(g) #else: print [s] index+=1 #print count headerRow = False grouped_values=[] original_data['header'] = original_line else: key = values[0] values=values[1:] grouped_floats=[] float_values = [] associated_groups=[] for g in groups: ### string values gvalues_list=[] for i in group_db[g]: try: if values[i] != '0': try: gvalues_list.append(float(values[i])) except Exception: pass else: #try: gvalues_list.append('') ### Thus are missing values #except Exception: pass pass except Exception: #try: gvalues_list.append('') ### Thus are missing values #except Exception: pass pass grouped_floats.append(gvalues_list) if len(gvalues_list)>1: associated_groups.append(g) matrix[key] = grouped_floats compared_groups[key] = associated_groups if '\n' not in original_line: original_line+='\n' original_data[key] = original_line last_line = line return matrix,compared_groups,original_data
def matrixImport(filename): matrix={} compared_groups={} ### track which values correspond to which groups for pairwise group comparisons original_data={} headerRow=True for line in open(filename,'rU').xreadlines(): original_line = line data = line.rstrip() values = string.split(data,'\t') #print len(values) if headerRow: group_db={} groups=[] if ':' in data: group_sample_list = map(lambda x: string.split(x,':'),values[1:]) index=1 for (g,s) in group_sample_list: try: group_db[g].append(index) except Exception: group_db[g] = [index] index+=1 if g not in groups: groups.append(g) else: import ExpressionBuilder search_dir = string.split(filename,'AltResults')[0]+'ExpressionInput' files = unique.read_directory(search_dir) for file in files: if 'groups.' in file and '.txt' in file: #print file sample_group_db = ExpressionBuilder.simplerGroupImport(search_dir+'/'+file) index=0; count=0 for s in values[1:]: if s in sample_group_db: g = sample_group_db[s] try: group_db[g].append(index) except Exception: group_db[g] = [index] count+=1 if g not in groups: groups.append(g) #else: print [s] index+=1 #print count headerRow = False grouped_values=[] original_data['header'] = original_line else: key = values[0] values=values[1:] grouped_floats=[] float_values = [] associated_groups=[] for g in groups: ### string values gvalues_list=[] for i in group_db[g]: try: if values[i] != '0': try: gvalues_list.append(float(values[i])) except Exception: pass else: #try: gvalues_list.append('') ### Thus are missing values #except Exception: pass pass except Exception: #try: gvalues_list.append('') ### Thus are missing values #except Exception: pass pass grouped_floats.append(gvalues_list) if len(gvalues_list)>1: associated_groups.append(g) matrix[key] = grouped_floats compared_groups[key] = associated_groups if '\n' not in original_line: original_line+='\n' original_data[key] = original_line last_line = line return matrix,compared_groups,original_data