def formatAndSubmitSplicingEventsToSashimiPlot(filename, bamdir, splicing_events, sample_group_db, groups, expandedSearch): ### Begin exporting parameters and events for SashimiPlot visualization firstLine = True setting = unique.filepath("Config/sashimi_plot_settings.txt") psi_parent_dir = findParentDir(filename) if 'PSI' not in filename: index_dir = string.split( psi_parent_dir, 'ExpressionInput' )[0] + "AltResults/AlternativeOutput/sashimi_index/" else: index_dir = psi_parent_dir + "sashimi_index/" spliced_junctions = [ ] ### Alternatively, compare to just one of the junctions for splicing_event in splicing_events: try: j1, j2 = string.split(splicing_event, ' ') spliced_junctions.append(j1) spliced_junctions.append(j2) except Exception: spliced_junctions.append( splicing_event) ### single gene ID or junction if 'PSI' not in filename: splicing_events_db = {} for event in splicing_events: event = string.replace(event, ':', '__') if ' ' in event: event = string.split(event, ' ')[-1] gene = string.split(event, "__")[0] try: splicing_events_db[gene].append(event) except Exception: splicing_events_db[gene] = [event] splicing_events = splicing_events_db import collections analyzed_junctions = [] processed_events = [] #restrictToTheseGroups=['WT','R636S_Het','R636S_Homo'] #Meg HSCP-1 , Myelocyte Mono restrictToTheseGroups = None for line in open(filename, 'rU').xreadlines(): line = cleanUpLine(line) t = string.split(line, '\t') if firstLine: if 'PSI' in filename: sampleIndexBegin = 11 sample_headers = t[sampleIndexBegin:] else: sampleIndexBegin = 1 sample_headers = t[sampleIndexBegin:] if '.bed' not in sample_headers[ 0]: ### Add .bed if removed manually sample_headers = map(lambda s: s + '.bed', sample_headers) index = 0 sample_group_index = {} for s in sample_headers: group = sample_group_db[s] sample_group_index[index] = group try: sampleReadDepth[index] = count_sum_array_db[s] except Exception: sampleReadDepth[index] = count_sum_array_db[s] index += 1 firstLine = False else: if 'PSI' in filename: splicing_event = val = t[2] + ' ' + t[3] j1 = t[2] j2 = t[3] if t[2] in analyzed_junctions and t[3] in analyzed_junctions: continue else: splicing_event = t[0] ### The gene ID j1 = t[0] j2 = t[0] if ":U" in splicing_event or "-U" in splicing_event: continue else: ### First check to see if the full splicing event matches the entry ### If not (and not a PSI regulation hits list), look for an individual junction match if splicing_event in splicing_events or ( expandedSearch and (j1 in spliced_junctions or j2 in spliced_junctions)): if splicing_event in processed_events: continue if j2 in processed_events: continue if j1 in processed_events: continue processed_events.append(splicing_event) processed_events.append(j1) processed_events.append(j2) #print processed_events, splicing_event if 'PSI' in filename: geneID = string.split(t[2], ':')[0] symbol = t[0] analyzed_junctions.append(t[2]) analyzed_junctions.append(t[3]) else: ### For exp.dataset-steady-state.txt files geneID = splicing_event events = splicing_events[geneID] index = 0 import collections initial_group_psi_values = {} try: group_psi_values = collections.OrderedDict() except Exception: try: import ordereddict group_psi_values = ordereddict.OrderedDict() except Exception: group_psi_values = {} for i in t[ sampleIndexBegin:]: ### Value PSI range in the input file try: group = sample_group_index[index] except Exception: group = None try: try: initial_group_psi_values[group].append( [float(i), index]) except Exception: initial_group_psi_values[group] = [[ float(i), index ]] except Exception: #print traceback.format_exc();sys.exit() pass ### Ignore the NULL values index += 1 if restrictToTheseGroups != None: ### Exclude unwanted groups initial_group_psi_values2 = {} groups2 = collections.OrderedDict() for group in groups: if group in initial_group_psi_values: if group in restrictToTheseGroups: initial_group_psi_values2[ group] = initial_group_psi_values[ group] groups2[group] = [] initial_group_psi_values = initial_group_psi_values2 groups = groups2 ### limit the number of events reported and sort based on the PSI values in each group if 'None' in groups and len(groups) == 1: initial_group_psi_values['None'].sort() group_size = len(initial_group_psi_values['None']) / 2 filtered_group_index1 = map( lambda x: x[1], initial_group_psi_values['None'][:group_size]) filtered_group_index2 = map( lambda x: x[1], initial_group_psi_values['None'][group_size:]) group_psi_values['low'] = filtered_group_index1 group_psi_values['high'] = filtered_group_index2 else: gn = 0 for group in groups: #print group gn += 1 #if gn>4: break if group in initial_group_psi_values: initial_group_psi_values[group].sort() if len(groups) > 7: filtered_group_indexes = map( lambda x: x[1], initial_group_psi_values[group][:1]) elif len(groups) > 5: filtered_group_indexes = map( lambda x: x[1], initial_group_psi_values[group][:2]) elif len(groups) > 3: filtered_group_indexes = map( lambda x: x[1], initial_group_psi_values[group][:4]) else: filtered_group_indexes = map( lambda x: x[1], initial_group_psi_values[group][:5]) group_psi_values[ group] = filtered_group_indexes try: update_plot_settings(bamdir, group_psi_values, sample_headers) except Exception: print 'Cannot update the settings file. Likely permissions issue.' try: reordered = reorderEvents([t[2] + ' ' + t[3]]) reordered = string.split(reordered[0], ' ') except Exception: reordered = [t[2] + ' ' + t[3]] reordered = string.split(reordered[0], ' ') #print reordered if 'PSI' in filename: try: formatted_splice_event = string.replace( reordered[1], ':', '__') except Exception: pass ### Submit the query try: ssp.plot_event(formatted_splice_event, index_dir, setting, outputdir) success = True except Exception: success = False #print traceback.format_exc() else: for event in events: try: ssp.plot_event(event, index_dir, setting, outputdir) #print 'success' #formatted_splice_event='ENSMUSG00000000355__E4.1-E5.1' except Exception: ### If it fails, output the gene-level plot try: ssp.plot_event(geneID, index_dir, setting, outputdir) success = True except Exception: success = False #print traceback.format_exc() """ ### Second attempt if 'PSI' in filename and success==False: ### Only relevant when parsing the junction pairs but not genes try: formatted_splice_event=string.replace(reordered[0],':','__') except Exception: pass try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); # print 'success' except Exception: pass """ return processed_events
def formatAndSubmitSplicingEventsToSashimiPlot( filename, bamdir, splicing_events, sample_group_db, groups, expandedSearch ): ### Begin exporting parameters and events for SashimiPlot visualization firstLine = True setting = unique.filepath("Config/sashimi_plot_settings.txt") psi_parent_dir = findParentDir(filename) if "PSI" not in filename: index_dir = string.split(psi_parent_dir, "ExpressionInput")[0] + "AltResults/AlternativeOutput/sashimi_index/" else: index_dir = psi_parent_dir + "sashimi_index/" spliced_junctions = [] ### Alternatively, compare to just one of the junctions for splicing_event in splicing_events: try: j1, j2 = string.split(splicing_event, " ") spliced_junctions.append(j1) spliced_junctions.append(j2) except Exception: spliced_junctions.append(splicing_event) ### single gene ID or junction if "PSI" not in filename: splicing_events_db = {} for event in splicing_events: event = string.replace(event, ":", "__") if " " in event: event = string.split(event, " ")[-1] gene = string.split(event, "__")[0] try: splicing_events_db[gene].append(event) except Exception: splicing_events_db[gene] = [event] splicing_events = splicing_events_db import collections analyzed_junctions = [] processed_events = [] for line in open(filename, "rU").xreadlines(): line = cleanUpLine(line) t = string.split(line, "\t") if firstLine: if "PSI" in filename: sampleIndexBegin = 11 sample_headers = t[sampleIndexBegin:] else: sampleIndexBegin = 1 sample_headers = t[sampleIndexBegin:] if ".bed" not in sample_headers[0]: ### Add .bed if removed manually sample_headers = map(lambda s: s + ".bed", sample_headers) index = 0 sample_group_index = {} for s in sample_headers: group = sample_group_db[s] sample_group_index[index] = group try: sampleReadDepth[index] = count_sum_array_db[s] except Exception: sampleReadDepth[index] = count_sum_array_db[s] index += 1 firstLine = False else: if "PSI" in filename: splicing_event = val = t[2] + " " + t[3] j1 = t[2] j2 = t[3] if t[2] in analyzed_junctions and t[3] in analyzed_junctions: continue else: splicing_event = t[0] ### The gene ID j1 = t[0] j2 = t[0] if ":U" in splicing_event or "-U" in splicing_event: continue else: ### First check to see if the full splicing event matches the entry ### If not (and not a PSI regulation hits list), look for an individual junction match if splicing_event in splicing_events or ( expandedSearch and (j1 in spliced_junctions or j2 in spliced_junctions) ): if splicing_event in processed_events: continue if j2 in processed_events: continue if j1 in processed_events: continue processed_events.append(splicing_event) processed_events.append(j1) processed_events.append(j2) # print processed_events, splicing_event if "PSI" in filename: geneID = string.split(t[2], ":")[0] symbol = t[0] analyzed_junctions.append(t[2]) analyzed_junctions.append(t[3]) else: ### For exp.dataset-steady-state.txt files geneID = splicing_event events = splicing_events[geneID] index = 0 import collections initial_group_psi_values = {} try: group_psi_values = collections.OrderedDict() except Exception: try: import ordereddict group_psi_values = ordereddict.OrderedDict() except Exception: group_psi_values = {} for i in t[sampleIndexBegin:]: ### Value PSI range in the input file try: group = sample_group_index[index] except Exception: group = None try: try: initial_group_psi_values[group].append([float(i), index]) except Exception: initial_group_psi_values[group] = [[float(i), index]] except Exception: # print traceback.format_exc();sys.exit() pass ### Ignore the NULL values index += 1 ### limit the number of events reported and sort based on the PSI values in each group if "None" in groups and len(groups) == 1: initial_group_psi_values["None"].sort() group_size = len(initial_group_psi_values["None"]) / 2 filtered_group_index1 = map(lambda x: x[1], initial_group_psi_values["None"][:group_size]) filtered_group_index2 = map(lambda x: x[1], initial_group_psi_values["None"][group_size:]) group_psi_values["low"] = filtered_group_index1 group_psi_values["high"] = filtered_group_index2 else: gn = 0 for group in groups: gn += 1 # if gn>4: break if group in initial_group_psi_values: initial_group_psi_values[group].sort() if len(groups) > 7: filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:1]) elif len(groups) > 5: filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:2]) elif len(groups) > 3: filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:4]) else: filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:5]) group_psi_values[group] = filtered_group_indexes try: update_plot_settings(bamdir, group_psi_values, sample_headers) except Exception: print "Cannot update the settings file. Likely permissions issue." try: reordered = reorderEvents([t[2] + " " + t[3]]) reordered = string.split(reordered[0], " ") except Exception: reordered = [t[2] + " " + t[3]] reordered = string.split(reordered[0], " ") # print reordered if "PSI" in filename: try: formatted_splice_event = string.replace(reordered[1], ":", "__") except Exception: pass ### Submit the query try: ssp.plot_event(formatted_splice_event, index_dir, setting, outputdir) success = True except Exception: success = False # print traceback.format_exc() else: for event in events: try: ssp.plot_event(event, index_dir, setting, outputdir) # print 'success' #formatted_splice_event='ENSMUSG00000000355__E4.1-E5.1' except Exception: ### If it fails, output the gene-level plot try: ssp.plot_event(geneID, index_dir, setting, outputdir) success = True except Exception: success = False # print traceback.format_exc() """ ### Second attempt if 'PSI' in filename and success==False: ### Only relevant when parsing the junction pairs but not genes try: formatted_splice_event=string.replace(reordered[0],':','__') except Exception: pass try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); # print 'success' except Exception: pass """ return processed_events
def sashmi_plot_list(bamdir,fname,gene_label,lines,samp,gene_sym): splicing_events=[] type = None firstLine = True for line in open(fname,'rU').xreadlines(): line = cleanUpLine(line) t = string.split(line,'\t') if firstLine: if 'junctionID-1' in t: j1i = t.index('junctionID-1') j2i = t.index('junctionID-2') type='ASPIRE' if 'ANOVA' in t: type='PSI' elif 'independent confirmation' in t: type='confirmed' elif 'ANOVA' in fname: type = 'ANOVA' firstLine=False if ' ' in t[0] and ':' in t[0]: splicing_events.append(t[0]) elif type=='ASPIRE': splicing_events.append(t[j1i] +' '+ t[j2i]) elif type=='ANOVA': try: a,b = string.split(t[0],'|') a = string.split(a,':') a = string.join(a[1:],':') splicing_events.append(a +' '+ b) except Exception: pass elif type=='PSI': try: j1,j2 = string.split(t[0],'|') a,b,c = string.split(j1,':') j1 = b+':'+c splicing_events.append(j1 +' '+ j2) except Exception: #print traceback.format_exc();sys.exit() pass elif type=='confirmed': try: event_pair1 = string.split(t[1],'|')[0] a,b,c,d = string.split(event_pair1,'-') splicing_events.append(a+'-'+b +' '+ c+'-'+d) except Exception: pass if len(splicing_events)==0: forceNoCompatibleEventsInFile print 'Exporting plots', for li in splicing_events: if ":U" in li or "-U" in li: continue else: li=cleanUpLine(li) #print li #dem[0]=['ENSG00000132424:I10.1 ENSG00000132424:E10.1-E11.1','ENSG00000146147:E10.3-E11.1 ENSG00000146147:E9.3-E15.1'] de=string.split(li,'\t') dem[0]=de #print dem[0] for key in dem: for i in range(len(dem[key])): list1=[] list2=[] try: k=gene_label.index(dem[key][i]) flag=1 lt=cleanUpLine(lines[k]) t=string.split(lt,'\t') #print t t=t[11:] #print t #list3=[] #ind=[] for x in range(len(t)): #print x,t[x] if(t[x]!=''): if float(t[x]) < 0.8: list1.append(x) #print x #print 'list1:'+str(x) else: list2.append(x) #print x # print str(x) else: continue if len(list1)>5: list1=list1[1:5] if len(list2)>5: list2=list2[1:5] #print len(list1),len(list2) except Exception: for ij in range(len(samp)): list1.append(ij) update_plot_settings(bamdir,list1,list2,samp) a=string.split(dem[key][i]," ") if '-' in a[1]: ch1=a[1] f=string.split(a[0],':') else: ch1=a[0] f=string.split(a[1],':') event=findParentDir(inputpsi) event=event+"trial_index/" setting =unique.filepath("Config/sashimi_plot_settings.txt") try: ch1=string.replace(ch1,':','__') except Exception: pass name=ch1 #outputdir=findParentDir(inputpsi)+"sashimiplots" try: os.makedirs(outputdir) except Exception: pass #print '********',[ch1],[event],outputdir try: ssp.plot_event(ch1,event,setting,outputdir) except Exception: #print '^^^^^^^^^^^^',[ch1],[event],outputdir;sys.exit() #print traceback.format_exc() #print "error2" #sys.exit() continue #outputdir=findParentDir(inputpsi)+"sashimiplots" for filename in os.listdir(outputdir): newname=string.split(filename,'/') #print newname[0] if newname[0] in gene_sym: new_path = gene_sym[newname[0]]+'-'+filename #new_path = string.replace() os.rename(filename,new_path) else: continue
def sashmi_plot_list(bamdir,fname,gene_label,lines,samp,gene_sym): line=fname.readlines() fname.close() for li in line: if ":U" in li or "-U" in li: continue else: li=cleanUpLine(li) print li #dem[0]=['ENSG00000132424:I10.1 ENSG00000132424:E10.1-E11.1','ENSG00000146147:E10.3-E11.1 ENSG00000146147:E9.3-E15.1'] de=string.split(li,'\t') dem[0]=de print dem[0] for key in dem: for i in range(len(dem[key])): list1=[] list2=[] try: k=gene_label.index(dem[key][i]) flag=1 lt=cleanUpLine(lines[k]) t=string.split(lt,'\t') print t t=t[11:] print t #list3=[] #ind=[] for x in range(len(t)): print x,t[x] if(t[x]!=''): if float(t[x]) < 0.8: list1.append(x) #print x #print 'list1:'+str(x) else: list2.append(x) #print x # print str(x) else: continue if len(list1)>5: list1=list1[1:5] if len(list2)>5: list2=list2[1:5] print len(list1),len(list2) except Exception: for ij in range(len(samp)): list1.append(ij) update_plot_settings(bamdir,list1,list2,samp) a=string.split(dem[key][i]," ") if '-' in a[1]: ch1=a[1] f=string.split(a[0],':') else: ch1=a[0] f=string.split(a[1],':') event=findParentDir(inputpsi) event=event+"trial_index/" setting ="sashimi_plot_settings_2.txt" name=ch1 outputdir=findParentDir(inputpsi)+"sashimiplots_aspire" if not os.path.isdir(outputdir): os.makedirs(outputdir) try: ssp.plot_event(ch1,event,setting,outputdir) except Exception: print "error2" continue outputdir=findParentDir(inputpsi)+"sashimiplots_aspire" for filename in os.listdir(outputdir): newname=string.split(filename,'/') print newname[0] if newname[0] in gene_sym: os.rename(filename,gene_sym[newname[0]]+'-'+filename) else: continue
def sashmi_plot_list(bamdir, fname, gene_label, lines, samp, gene_sym): splicing_events = [] type = None firstLine = True for line in open(fname, 'rU').xreadlines(): line = cleanUpLine(line) t = string.split(line, '\t') if firstLine: if 'junctionID-1' in t: j1i = t.index('junctionID-1') j2i = t.index('junctionID-2') type = 'ASPIRE' if 'ANOVA' in t: type = 'PSI' elif 'independent confirmation' in t: type == 'confirmed' firstLine = False if ' ' in t[0] and ':' in t[0]: splicing_events.append(t[0]) elif type == 'ASPIRE': splicing_events.append(t[j1i] + ' ' + t[j2i]) elif type == 'PSI': try: j1, j2 = string.split(t[0], '|') a, b, c = string.split(j1, ':') j1 = b + ':' + c splicing_events.append(j1 + ' ' + j2) except Exception: pass elif type == 'confirmed': try: event_pair1 = string.split(t[1], '|')[0] a, b, c, d = string.split(event_pair1, '-') splicing_events.append(a + '-' + b + ' ' + c + '-' + d) except Exception: pass if len(splicing_events) == 0: forceNoCompatibleEventsInFile print 'Exporting plots', for li in splicing_events: if ":U" in li or "-U" in li: continue else: li = cleanUpLine(li) #print li #dem[0]=['ENSG00000132424:I10.1 ENSG00000132424:E10.1-E11.1','ENSG00000146147:E10.3-E11.1 ENSG00000146147:E9.3-E15.1'] de = string.split(li, '\t') dem[0] = de #print dem[0] for key in dem: for i in range(len(dem[key])): list1 = [] list2 = [] try: k = gene_label.index(dem[key][i]) flag = 1 lt = cleanUpLine(lines[k]) t = string.split(lt, '\t') #print t t = t[11:] #print t #list3=[] #ind=[] for x in range(len(t)): #print x,t[x] if (t[x] != ''): if float(t[x]) < 0.8: list1.append(x) #print x #print 'list1:'+str(x) else: list2.append(x) #print x # print str(x) else: continue if len(list1) > 5: list1 = list1[1:5] if len(list2) > 5: list2 = list2[1:5] #print len(list1),len(list2) except Exception: for ij in range(len(samp)): list1.append(ij) update_plot_settings(bamdir, list1, list2, samp) a = string.split(dem[key][i], " ") if '-' in a[1]: ch1 = a[1] f = string.split(a[0], ':') else: ch1 = a[0] f = string.split(a[1], ':') event = findParentDir(inputpsi) event = event + "trial_index/" setting = unique.filepath( "Config/sashimi_plot_settings.txt") name = ch1 #outputdir=findParentDir(inputpsi)+"sashimiplots" try: os.makedirs(outputdir) except Exception: pass try: ssp.plot_event(ch1, event, setting, outputdir) except Exception: #print traceback.format_exc() #print "error2" continue #outputdir=findParentDir(inputpsi)+"sashimiplots" for filename in os.listdir(outputdir): newname = string.split(filename, '/') #print newname[0] if newname[0] in gene_sym: new_path = gene_sym[newname[0]] + '-' + filename #new_path = string.replace() os.rename(filename, new_path) else: continue