def convert_to_bp(alelle, data, LIZ_500): data_105 = list(data) i = len(data) indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200) ind = [] j = len(indexes) - 1 k = 0 while j >= 0: if k == 15: ind.append(indexes[j]) break ind.append(indexes[j]) j -= 1 k += 1 for c in range(0, 16): if alelle > ind[c] or alelle == ind[c]: bp_pred = ((alelle - ind[c]) / ((ind[c - 1] - ind[c]) / (LIZ_500[c - 1] - LIZ_500[c]))) + LIZ_500[c] break else: bp_pred = 0 return bp_pred
def convert_to_bp(alelle, data, LIZ_500): data_105 = list(data) i = len(data) indexes = findpeaks.findpeaks(data_105, spacing=35, limit=100) ind = [] j = len(indexes) - 1 k = 0 # print(indexes) while j >= 0: if k == 15: ind.append(indexes[j]) break counter = 1 next_index = j - counter del_ratio = (indexes[j] - indexes[next_index]) / (LIZ_500[k] - LIZ_500[k + 1]) # print(del_ratio) in_range = del_ratio > 8.5 and del_ratio < 13.5 if in_range: ind.append(indexes[j]) k += 1 j -= 1 else: while del_ratio < 8.5 or del_ratio > 13.5: counter += 1 next_index = j - counter if next_index < 0: j -= 1 counter = 1 next_index = j - counter del_ratio = (indexes[j] - indexes[next_index]) / ( LIZ_500[k] - LIZ_500[k + 1]) ind.append(indexes[j]) j = next_index k += 1 # print(ind) for c in range(0, 16): if alelle > ind[c] or alelle == ind[c]: bp_pred = ((alelle - ind[c]) / ((ind[c - 1] - ind[c]) / (LIZ_500[c - 1] - LIZ_500[c]))) + LIZ_500[c] break else: bp_pred = 0 return bp_pred
def find_lower(data, dye): data_105 = list(data) i = len(data) indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200) ind = [] j = len(indexes) - 1 k = 0 while j >= 0: if k == 15: ind.append(indexes[j]) break ind.append(indexes[j]) j -= 1 k += 1 return ind[len(ind) - 1]
def find_upper(data, LIZ_500): data_105 = list(data) i = len(data) indexes = findpeaks.findpeaks(data_105, spacing=35, limit=100) ind = [] j = len(indexes) - 1 k = 0 while j >= 0: if k == 15: ind.append(indexes[j]) break counter = 1 next_index = j - counter del_ratio = (indexes[j] - indexes[next_index]) / (LIZ_500[k] - LIZ_500[k + 1]) # print(del_ratio) in_range = del_ratio > 8.5 and del_ratio < 13.5 if in_range: ind.append(indexes[j]) k += 1 j -= 1 else: while del_ratio < 8.5 or del_ratio > 13.5: counter += 1 next_index = j - counter if next_index < 0: j -= 1 counter = 1 next_index = j - counter del_ratio = (indexes[j] - indexes[next_index]) / ( LIZ_500[k] - LIZ_500[k + 1]) ind.append(indexes[j]) j = next_index k += 1 if ind[0] == ind[len(ind) - 1]: ind.pop(len(ind) - 1) return ind[0]
def convert_to_index(bp, data, LIZ_500): data_105 = list(data) i = len(data) indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200) ind = [] j = len(indexes) - 1 k = 0 while j >= 0: if k == 15: ind.append(indexes[j]) break ind.append(indexes[j]) j -= 1 k += 1 for c in range(0, 16): if bp > LIZ_500[c]: index_pred = ((bp - LIZ_500[c]) / ((LIZ_500[c - 1] - LIZ_500[c]) / (ind[c - 1] - ind[c]))) + ind[c] break return index_pred
break alelle2_index = index_of_peaks[height.index(max(height))] if alelle2_index > alelle1_index: if alelle1_index+80>=alelle2_index: two_peaks = True else: two_peaks = False else: if alelle1_index-80<=alelle2_index: two_peaks = True else: two_peaks = False all_peaks = fp.findpeaks(data, spacing=25, limit=25) for i in range(len(all_peaks)-1): lower = all_peaks[i] - 80 upper = all_peaks[i] + 20 if lower < index_min or upper > index_max: continue if noise_count > 2320: if alelle1 != alelle2: if all_peaks[i] == alelle1_index or all_peaks[i] == alelle2_index: file_name.append(filename) area_of_peaks.append(np.trapz(data[lower:upper])) number_of_peaks.append(len(fp.findpeaks(data[lower:upper], spacing=5,limit=15))) length_in_bp.append(round(convert_to_bp(all_peaks[i], record.annotations['abif_raw'][e], dye))) height_of_peaks.append(data[all_peaks[i]])
dipFound = False while dipFound == False: slope = (channel[peak+1]-channel[peak])/((peak+1)-peak) if slope >= 0: dipFound = True plt.scatter(peak- a, channeldata[peak],color='red') peak -= 1 peak += 1 return(peak) currentfile = "A_BOH_12_12.fsa" my_dir="/home/bo/PGC/microsat/testdata/training/GetHeight/" record = SeqIO.read(my_dir+currentfile,"abi") channeldata = record.annotations['abif_raw']['DATA1'] smol_test = channeldata[a:b] all_peaks = fp.findpeaks(smol_test,spacing=5,limit=1000) print(all_peaks) print(leftwarddip(channeldata,all_peaks)) print(rightwarddip(channeldata,all_peaks)) plt.scatter(all_peaks,[channeldata[3350 + i] for i in all_peaks]) plt.plot(smol_test,'-o',markersize=4) plt.show()
def plotgraph(directory, filename, peakwindow, threshold=2000): filename = filename my_dir = directory threshold = 500 #Load Data from FSA file record = SeqIO.read(my_dir + filename, "abi") channeldata = np.array(record.annotations['abif_raw']['DATA1']) ladderdata = np.array(record.annotations['abif_raw']['DATA105']) #What if the user wants to reset the scan with lower threshold? #what if true peak is actually lower than the default 2000 threshold? #This while block allows the user to repeat the scanning procedure cond = False while cond == False: all_pk = fp.findpeaks(channeldata, spacing=25, limit=threshold) all_pk = [ a for a in all_pk if a > int(peakwindow[0]) and a < int(peakwindow[1]) ] peak_height = [channeldata[a] for a in all_pk] #What if no peaks is detected? #This block shows the user the graph and that nothing is detected if len(all_pk) == 0: print("No peaks found in channel with threshold=%s" % threshold) plt.plot(channeldata) plt.show() print("Peaks detected from channel: %s \n%s\n" % (len(all_pk), all_pk)) seg_ranges = [[x - 80, x + 40] for x in all_pk] #Segment Ranges seg_area = [np.trapz(channeldata[s[0]:s[1]]) for s in seg_ranges] pk_area = [ np.trapz(channeldata[Peakboundaries(channeldata, i)[0]: Peakboundaries(channeldata, i)[1]]) for i in all_pk ] stu_area = [seg_area[i] - pk_area[i] for i in range(len(seg_area))] print(f'peaks at {all_pk}') print(f'peak height {peak_height}') print(f'seg area {seg_area}') print(f'peak area {pk_area}') print(f'stutter area {stu_area}\n') for i in range(len(seg_ranges)): a, b = seg_ranges[i][0], seg_ranges[i][1] segment = np.array(channeldata[a:b]) # the points plt.style.use('seaborn') plt.subplot(2, len(seg_ranges), i + 1) plt.plot(list(range(a, b)), segment, color='blue') #peak+stutter plt.title(f'{i+1}\nlikelihood: 5%') plt.yticks([]) plt.ylim(0, max(peak_height) + 1000) #bottom plt.subplot(2, 1, 2) plt.plot(ladderdata, color='black', alpha=0.3) plt.plot(channeldata) plt.plot(all_pk, peak_height, 'o', color='red', markersize=5, label="Suggested Peaks") plt.ylim(0, max(peak_height) + 1000) plt.xlim(500, 7000) for i in range(len(seg_ranges)): plt.axvspan(seg_ranges[i][0], seg_ranges[i][1], color='m', alpha=0.5) plt.axvspan(0, peakwindow[0], color='black', alpha=0.3, label="Excluded from peak search") plt.axvspan(int(peakwindow[1]), len(channeldata), color='black', alpha=0.3) plt.axhline(y=0, color='k') #x axis line plt.title(filename) plt.legend() #show labels plt.show(block=False) #print the graph print("%s potential peaks detected. Please select peaks " % len(seg_ranges), end='') print( "separated by commas. \ne.g: 1,2 (heterozygous) or 1,1 (homozygous)\n" ) print("[0] -> Repeat scan with lower threshold") for i in range(len(seg_ranges)): print("[{}] -> {}".format(i + 1, seg_ranges[i])) #Asks for user input and select peaks to choose #Idea: add option to repeat findpeaks or manual [0] if desired peak is not detected. while True: sel_peaks = str(input("\n>")).split(',') sel_peaks = [int(p) - 1 for p in sel_peaks] if len(sel_peaks) == 1 and sel_peaks[0] == -1: #not all thres is detected, repeat scan threshold -= 500 print("Repeating scan with lowered threshold (-500)") break elif len(sel_peaks) > len(seg_ranges) or min( sel_peaks) + 1 < 0 or max(sel_peaks) + 1 > len(seg_ranges): print("Invalid input") else: cond = True #condition is fulfulled break print("Please close graph to continue.....") plt.show() #figure out a way to move these to the top # sel_peaks = [p-1 for p in sel_peaks] # not_height = [height[i] for i in range(len(height)) if i not in sel_peaks] # not_seg_area = [seg_area[i] for i in range(len(seg_area)) if i not in sel_peaks] # not_sel_peaks = [all_pk[i] for i in range(len(all_pk)) if i not in sel_peaks] # height = [height[i] for i in range(len(height)) if i in sel_peaks] # seg_area = [seg_area[i] for i in range(len(seg_area)) if i in sel_peaks] # sel_peaks = [all_pk[p] for p in sel_peaks] # pk_area = [np.trapz(channeldata[Peakboundaries(channeldata,i)[0]:Peakboundaries(channeldata,i)[1]]) # for i in sel_peaks] # pk_area_not = [np.trapz(channeldata[Peakboundaries(channeldata,i)[0]:Peakboundaries(channeldata,i)[1]]) # for i in not_sel_peaks] # stu_area = [seg_area[i] - pk_area[i] for i in range(len(seg_area))] # stu_area_not = [not_seg_area[i] - pk_area_not[i] for i in range(len(not_seg_area))] """
break alelle2_index = index_of_peaks[height.index(max(height))] if alelle2_index > alelle1_index: if alelle1_index + 80 >= alelle2_index: two_peaks = True else: two_peaks = False else: if alelle1_index - 80 <= alelle2_index: two_peaks = True else: two_peaks = False all_peaks = fp.findpeaks(data, spacing=25, limit=25) for i in range(len(all_peaks) - 1): lower = all_peaks[i] - 40 upper = all_peaks[i] + 40 if lower < index_min or upper > index_max: continue if noise_count >= 1268: if alelle1 != alelle2: if all_peaks[i] == alelle1_index or all_peaks[ i] == alelle2_index: file_name.append(filename) area_of_peaks.append(np.trapz(data[lower:upper])) # number_of_peaks.append(len(fp.findpeaks(data[lower:upper], spacing=5,limit=15))) length_in_bp.append(
file = input('File to analyze: ') record = SeqIO.read(file + '.fsa', 'abi') data = record.annotations['abif_raw'][a] label = [] area_of_peaks = [] no_of_peaks = [] length = [] peaks = [] height = [] index_35 = find_lower(record.annotations['abif_raw']['DATA105'], dye) index_500 = find_upper(record.annotations['abif_raw']['DATA105'], dye) detected_peaks = fp.findpeaks(data, spacing=25, limit=25) for i in range(len(detected_peaks) - 1): lower_end_of_window = detected_peaks[i] - 80 upper_end_of_window = detected_peaks[i] + 20 if lower_end_of_window < index_35 or upper_end_of_window > index_500: continue peaks.append(detected_peaks[i]) area_of_peaks.append( np.trapz(data[lower_end_of_window:upper_end_of_window])) no_of_peaks.append( len( fp.findpeaks(data[lower_end_of_window:upper_end_of_window], spacing=5, limit=15)))