Example #1
0
def convert_to_bp(alelle, data, LIZ_500):
    data_105 = list(data)
    i = len(data)
    indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200)
    ind = []
    j = len(indexes) - 1
    k = 0

    while j >= 0:
        if k == 15:
            ind.append(indexes[j])
            break
        ind.append(indexes[j])
        j -= 1
        k += 1

    for c in range(0, 16):
        if alelle > ind[c] or alelle == ind[c]:
            bp_pred = ((alelle - ind[c]) /
                       ((ind[c - 1] - ind[c]) /
                        (LIZ_500[c - 1] - LIZ_500[c]))) + LIZ_500[c]
            break
        else:
            bp_pred = 0

    return bp_pred
Example #2
0
def convert_to_bp(alelle, data, LIZ_500):
    data_105 = list(data)
    i = len(data)
    indexes = findpeaks.findpeaks(data_105, spacing=35, limit=100)
    ind = []
    j = len(indexes) - 1
    k = 0

    # print(indexes)

    while j >= 0:
        if k == 15:
            ind.append(indexes[j])
            break
        counter = 1
        next_index = j - counter
        del_ratio = (indexes[j] - indexes[next_index]) / (LIZ_500[k] -
                                                          LIZ_500[k + 1])
        # print(del_ratio)
        in_range = del_ratio > 8.5 and del_ratio < 13.5
        if in_range:
            ind.append(indexes[j])
            k += 1
            j -= 1
        else:
            while del_ratio < 8.5 or del_ratio > 13.5:
                counter += 1
                next_index = j - counter
                if next_index < 0:
                    j -= 1
                    counter = 1
                    next_index = j - counter
                del_ratio = (indexes[j] - indexes[next_index]) / (
                    LIZ_500[k] - LIZ_500[k + 1])
            ind.append(indexes[j])
            j = next_index
            k += 1

    # print(ind)

    for c in range(0, 16):
        if alelle > ind[c] or alelle == ind[c]:
            bp_pred = ((alelle - ind[c]) /
                       ((ind[c - 1] - ind[c]) /
                        (LIZ_500[c - 1] - LIZ_500[c]))) + LIZ_500[c]
            break
        else:
            bp_pred = 0

    return bp_pred
Example #3
0
def find_lower(data, dye):
    data_105 = list(data)
    i = len(data)
    indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200)
    ind = []
    j = len(indexes) - 1
    k = 0

    while j >= 0:
        if k == 15:
            ind.append(indexes[j])
            break
        ind.append(indexes[j])
        j -= 1
        k += 1

    return ind[len(ind) - 1]
Example #4
0
def find_upper(data, LIZ_500):
    data_105 = list(data)
    i = len(data)
    indexes = findpeaks.findpeaks(data_105, spacing=35, limit=100)
    ind = []
    j = len(indexes) - 1
    k = 0

    while j >= 0:
        if k == 15:
            ind.append(indexes[j])
            break
        counter = 1
        next_index = j - counter
        del_ratio = (indexes[j] - indexes[next_index]) / (LIZ_500[k] -
                                                          LIZ_500[k + 1])
        # print(del_ratio)
        in_range = del_ratio > 8.5 and del_ratio < 13.5
        if in_range:
            ind.append(indexes[j])
            k += 1
            j -= 1
        else:
            while del_ratio < 8.5 or del_ratio > 13.5:
                counter += 1
                next_index = j - counter
                if next_index < 0:
                    j -= 1
                    counter = 1
                    next_index = j - counter
                del_ratio = (indexes[j] - indexes[next_index]) / (
                    LIZ_500[k] - LIZ_500[k + 1])
            ind.append(indexes[j])
            j = next_index
            k += 1

    if ind[0] == ind[len(ind) - 1]:
        ind.pop(len(ind) - 1)

    return ind[0]
Example #5
0
def convert_to_index(bp, data, LIZ_500):
    data_105 = list(data)
    i = len(data)
    indexes = findpeaks.findpeaks(data_105, spacing=50, limit=200)
    ind = []
    j = len(indexes) - 1
    k = 0

    while j >= 0:
        if k == 15:
            ind.append(indexes[j])
            break
        ind.append(indexes[j])
        j -= 1
        k += 1

    for c in range(0, 16):
        if bp > LIZ_500[c]:
            index_pred = ((bp - LIZ_500[c]) / ((LIZ_500[c - 1] - LIZ_500[c]) /
                                               (ind[c - 1] - ind[c]))) + ind[c]
            break

    return index_pred
Example #6
0
					break

			alelle2_index = index_of_peaks[height.index(max(height))]

			if alelle2_index > alelle1_index:
				if alelle1_index+80>=alelle2_index:
					two_peaks = True
				else:
					two_peaks = False
			else:
				if alelle1_index-80<=alelle2_index:
					two_peaks = True
				else:
					two_peaks = False

		all_peaks = fp.findpeaks(data, spacing=25, limit=25)

		for i in range(len(all_peaks)-1):
			lower = all_peaks[i] - 80
			upper = all_peaks[i] + 20
			if lower < index_min or upper > index_max:
				continue

			if noise_count > 2320:
				if alelle1 != alelle2:
					if all_peaks[i] == alelle1_index or all_peaks[i] == alelle2_index:
						file_name.append(filename)
						area_of_peaks.append(np.trapz(data[lower:upper]))
						number_of_peaks.append(len(fp.findpeaks(data[lower:upper], spacing=5,limit=15)))
						length_in_bp.append(round(convert_to_bp(all_peaks[i], record.annotations['abif_raw'][e], dye)))
						height_of_peaks.append(data[all_peaks[i]])
Example #7
0
	
	dipFound = False
	while dipFound == False:
		slope = (channel[peak+1]-channel[peak])/((peak+1)-peak)
		if slope >= 0:
			dipFound = True
			plt.scatter(peak- a, channeldata[peak],color='red')
			peak -= 1

		peak += 1
	return(peak)

currentfile = "A_BOH_12_12.fsa"
my_dir="/home/bo/PGC/microsat/testdata/training/GetHeight/"
record = SeqIO.read(my_dir+currentfile,"abi")
channeldata = record.annotations['abif_raw']['DATA1']
smol_test = channeldata[a:b]

all_peaks = fp.findpeaks(smol_test,spacing=5,limit=1000)
print(all_peaks)


print(leftwarddip(channeldata,all_peaks))
print(rightwarddip(channeldata,all_peaks))

plt.scatter(all_peaks,[channeldata[3350 + i] for i in all_peaks])
plt.plot(smol_test,'-o',markersize=4)
plt.show()


Example #8
0
def plotgraph(directory, filename, peakwindow, threshold=2000):
    filename = filename
    my_dir = directory
    threshold = 500

    #Load Data from FSA file
    record = SeqIO.read(my_dir + filename, "abi")
    channeldata = np.array(record.annotations['abif_raw']['DATA1'])
    ladderdata = np.array(record.annotations['abif_raw']['DATA105'])

    #What if the user wants to reset the scan with lower threshold?
    #what if true peak is actually lower than the default 2000 threshold?
    #This while block allows the user to repeat the scanning procedure
    cond = False
    while cond == False:

        all_pk = fp.findpeaks(channeldata, spacing=25, limit=threshold)
        all_pk = [
            a for a in all_pk
            if a > int(peakwindow[0]) and a < int(peakwindow[1])
        ]
        peak_height = [channeldata[a] for a in all_pk]

        #What if no peaks is detected?
        #This block shows the user the graph and that nothing is detected
        if len(all_pk) == 0:
            print("No peaks found in channel with threshold=%s" % threshold)
            plt.plot(channeldata)
            plt.show()

        print("Peaks detected from channel: %s \n%s\n" % (len(all_pk), all_pk))
        seg_ranges = [[x - 80, x + 40] for x in all_pk]  #Segment Ranges

        seg_area = [np.trapz(channeldata[s[0]:s[1]]) for s in seg_ranges]
        pk_area = [
            np.trapz(channeldata[Peakboundaries(channeldata, i)[0]:
                                 Peakboundaries(channeldata, i)[1]])
            for i in all_pk
        ]
        stu_area = [seg_area[i] - pk_area[i] for i in range(len(seg_area))]

        print(f'peaks at {all_pk}')
        print(f'peak height {peak_height}')
        print(f'seg area {seg_area}')
        print(f'peak area {pk_area}')
        print(f'stutter area {stu_area}\n')

        for i in range(len(seg_ranges)):
            a, b = seg_ranges[i][0], seg_ranges[i][1]
            segment = np.array(channeldata[a:b])

            # the points
            plt.style.use('seaborn')
            plt.subplot(2, len(seg_ranges), i + 1)
            plt.plot(list(range(a, b)), segment, color='blue')  #peak+stutter
            plt.title(f'{i+1}\nlikelihood: 5%')
            plt.yticks([])
            plt.ylim(0, max(peak_height) + 1000)

        #bottom
        plt.subplot(2, 1, 2)
        plt.plot(ladderdata, color='black', alpha=0.3)
        plt.plot(channeldata)
        plt.plot(all_pk,
                 peak_height,
                 'o',
                 color='red',
                 markersize=5,
                 label="Suggested Peaks")
        plt.ylim(0, max(peak_height) + 1000)
        plt.xlim(500, 7000)

        for i in range(len(seg_ranges)):
            plt.axvspan(seg_ranges[i][0],
                        seg_ranges[i][1],
                        color='m',
                        alpha=0.5)

        plt.axvspan(0,
                    peakwindow[0],
                    color='black',
                    alpha=0.3,
                    label="Excluded from peak search")
        plt.axvspan(int(peakwindow[1]),
                    len(channeldata),
                    color='black',
                    alpha=0.3)
        plt.axhline(y=0, color='k')  #x axis line
        plt.title(filename)
        plt.legend()  #show labels
        plt.show(block=False)  #print the graph

        print("%s potential peaks detected. Please select peaks " %
              len(seg_ranges),
              end='')
        print(
            "separated by commas. \ne.g: 1,2 (heterozygous) or 1,1 (homozygous)\n"
        )

        print("[0] -> Repeat scan with lower threshold")
        for i in range(len(seg_ranges)):
            print("[{}] -> {}".format(i + 1, seg_ranges[i]))

        #Asks for user input and select peaks to choose
        #Idea: add option to repeat findpeaks or manual [0] if desired peak is not detected.

        while True:
            sel_peaks = str(input("\n>")).split(',')
            sel_peaks = [int(p) - 1 for p in sel_peaks]
            if len(sel_peaks) == 1 and sel_peaks[0] == -1:
                #not all thres is detected, repeat scan
                threshold -= 500
                print("Repeating scan with lowered threshold (-500)")
                break
            elif len(sel_peaks) > len(seg_ranges) or min(
                    sel_peaks) + 1 < 0 or max(sel_peaks) + 1 > len(seg_ranges):
                print("Invalid input")
            else:
                cond = True  #condition is fulfulled
                break

        print("Please close graph to continue.....")
        plt.show()

    #figure out a way to move these to the top
    # sel_peaks = [p-1 for p in sel_peaks]

    # not_height = [height[i] for i in range(len(height)) if i not in sel_peaks]
    # not_seg_area = [seg_area[i] for i in range(len(seg_area)) if i not in sel_peaks]
    # not_sel_peaks = [all_pk[i] for i in range(len(all_pk)) if i not in sel_peaks]

    # height = [height[i] for i in range(len(height)) if i in sel_peaks]
    # seg_area = [seg_area[i] for i in range(len(seg_area)) if i in sel_peaks]
    # sel_peaks = [all_pk[p] for p in sel_peaks]

    # pk_area = [np.trapz(channeldata[Peakboundaries(channeldata,i)[0]:Peakboundaries(channeldata,i)[1]])
    # 	for i in sel_peaks]

    # pk_area_not = [np.trapz(channeldata[Peakboundaries(channeldata,i)[0]:Peakboundaries(channeldata,i)[1]])
    # 	for i in not_sel_peaks]

    # stu_area = [seg_area[i] - pk_area[i] for i in range(len(seg_area))]
    # stu_area_not = [not_seg_area[i] - pk_area_not[i] for i in range(len(not_seg_area))]
    """
Example #9
0
                    break

            alelle2_index = index_of_peaks[height.index(max(height))]

            if alelle2_index > alelle1_index:
                if alelle1_index + 80 >= alelle2_index:
                    two_peaks = True
                else:
                    two_peaks = False
            else:
                if alelle1_index - 80 <= alelle2_index:
                    two_peaks = True
                else:
                    two_peaks = False

        all_peaks = fp.findpeaks(data, spacing=25, limit=25)

        for i in range(len(all_peaks) - 1):
            lower = all_peaks[i] - 40
            upper = all_peaks[i] + 40
            if lower < index_min or upper > index_max:
                continue

            if noise_count >= 1268:
                if alelle1 != alelle2:
                    if all_peaks[i] == alelle1_index or all_peaks[
                            i] == alelle2_index:
                        file_name.append(filename)
                        area_of_peaks.append(np.trapz(data[lower:upper]))
                        # number_of_peaks.append(len(fp.findpeaks(data[lower:upper], spacing=5,limit=15)))
                        length_in_bp.append(
Example #10
0
file = input('File to analyze: ')
record = SeqIO.read(file + '.fsa', 'abi')
data = record.annotations['abif_raw'][a]

label = []
area_of_peaks = []
no_of_peaks = []
length = []
peaks = []
height = []

index_35 = find_lower(record.annotations['abif_raw']['DATA105'], dye)
index_500 = find_upper(record.annotations['abif_raw']['DATA105'], dye)

detected_peaks = fp.findpeaks(data, spacing=25, limit=25)

for i in range(len(detected_peaks) - 1):
    lower_end_of_window = detected_peaks[i] - 80
    upper_end_of_window = detected_peaks[i] + 20
    if lower_end_of_window < index_35 or upper_end_of_window > index_500:
        continue

    peaks.append(detected_peaks[i])
    area_of_peaks.append(
        np.trapz(data[lower_end_of_window:upper_end_of_window]))
    no_of_peaks.append(
        len(
            fp.findpeaks(data[lower_end_of_window:upper_end_of_window],
                         spacing=5,
                         limit=15)))