def eic_centroid_detector(self, rt, eic, max_eic): max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent signal_threshold = self.chromatogram_settings.eic_signal_threshold min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold correct_baseline = False include_indexes = sp.peak_picking_first_derivative( rt, eic, max_height, max_prominence, max_eic, min_peak_datapoints, peak_derivative_threshold, signal_threshold=signal_threshold, correct_baseline=correct_baseline) return include_indexes
def plot(self, ax=None, color="black", derivative=True, deriv_color='red'): #pragma: no cover if self._ms_parent: import matplotlib.pyplot as plt if ax is None: ax = plt.gca() x = self._ms_parent.mz_exp_profile[self.start_scan:self.final_scan] y = self._ms_parent.abundance_profile[self.start_scan:self. final_scan] ax.plot(x, y, color=color, label="Data") ax.set(xlabel='m/z', ylabel='abundance') if derivative and not self._ms_parent.is_centroid: dy = sp.derivate( self._ms_parent. abundance_profile[self.start_index:self.final_index + 1]) ax.plot(x, dy, c=deriv_color) else: ax.plot((self.mz_exp, self.mz_exp), (0, self.abundance), color=color, label="Data") #plt.legend() return ax else: print("Centroid Peak Object")
def centroid_detector(self, rt, tic): #noise_std = self.chromatogram_settings.std_noise_threshold #method = self.chromatogram_settings.noise_threshold_method #peak picking #min_height = self.chromatogram_settings.peak_height_min_percent min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold signal_threshold = self.chromatogram_settings.peak_height_min_percent # baseline detection max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent correct_baseline = False #peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) include_indexes = sp.peak_picking_first_derivative( rt, tic, max_height, max_prominence, max(tic), min_peak_datapoints, peak_derivative_threshold, signal_threshold=signal_threshold, correct_baseline=correct_baseline) return include_indexes
def calc_centroid(self, mass, abund, freq): max_height = self.mspeaks_settings.peak_height_max_percent max_prominence = self.mspeaks_settings.peak_max_prominence_percent min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold max_abun = max(abund) peak_height_diff = lambda hi, li: ( (abund[hi] - abund[li]) / max_abun) * 100 domain = mass signal = abund len_signal = len(signal) signal_threshold, factor = self.get_threshold(abund) max_signal = factor correct_baseline = False include_indexes = sp.peak_picking_first_derivative( domain, signal, max_height, max_prominence, max_signal, min_peak_datapoints, peak_derivative_threshold, signal_threshold=signal_threshold, correct_baseline=correct_baseline, abun_norm=1, plot_res=False) for indexes_tuple in include_indexes: apex_index = indexes_tuple[1] mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic( mass, abund, freq, apex_index) if mz_exp_centroid: peak_indexes = self.check_prominence(abund, apex_index, len_signal, peak_height_diff) if peak_indexes: peak_resolving_power = self.calculate_resolving_power( abund, mass, apex_index) s2n = intes_centr / self.baselise_noise_std self.add_mspeak(self.polarity, mz_exp_centroid, abund[apex_index], peak_resolving_power, s2n, indexes_tuple, exp_freq=freq_centr, ms_parent=self)
def smooth_signal(self, signal): implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method pol_order = self.chromatogram_settings.savgol_pol_order window_len = self.chromatogram_settings.smooth_window window = self.chromatogram_settings.smooth_method return sp.smooth_signal(signal, window_len, window, pol_order, implemented_smooth_method)
def smooth_signal(signal, parameters: LCMSParameters): implemented_smooth_method = parameters.lc_ms.implemented_smooth_method pol_order = parameters.lc_ms.savgol_pol_order smooth_method = parameters.lc_ms.smooth_method window_len = parameters.lc_ms.smooth_window return sp.smooth_signal(signal, window_len, smooth_method, pol_order, implemented_smooth_method)
def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover import matplotlib.pyplot as plt if ax is None: ax = plt.gca() max_height = self.chromatogram_settings.peak_height_max_percent max_prominence = self.chromatogram_settings.peak_max_prominence_percent baseline = sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence) ax.plot(self.retention_time, color=color) ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') return ax
def centroid_detector(self, tic, rt): ''' this function has been replaced with sp.peak_picking_first_derivative and it not used ''' noise_std = self.chromatogram_settings.std_noise_threshold method = self.chromatogram_settings.noise_threshold_method ''' peak picking''' min_height = self.chromatogram_settings.peak_height_min_percent min_datapoints = self.chromatogram_settings.min_peak_datapoints ''' baseline detection''' max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) return peak_indexes_generator
def centroid_detector(self, tic, rt): noise_std = self.chromatogram_settings.std_noise_threshold method = self.chromatogram_settings.noise_threshold_method #peak picking min_height = self.chromatogram_settings.peak_height_min_percent min_datapoints = self.chromatogram_settings.min_peak_datapoints # baseline detection max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent peak_indexes_generator = sp.peak_detector_generator( tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) return peak_indexes_generator
def eic_centroid_detector(max_tic, eic_data: EIC_Data, parameters: LCMSParameters, smooth=True): # Do peak picking and store results inside EIC_Data class max_prominence = parameters.lc_ms.peak_max_prominence_percent max_height = parameters.lc_ms.peak_height_max_percent signal_threshold = parameters.lc_ms.eic_signal_threshold min_peak_datapoints = parameters.lc_ms.min_peak_datapoints correct_baseline = parameters.lc_ms.correct_eic_baseline peak_derivative_threshold = parameters.lc_ms.peak_derivative_threshold if smooth: eic_signal = smooth_signal(eic_data.eic, parameters) else: eic_signal = eic_data.eic peak_indexes_generator = sp.peak_picking_first_derivative( eic_data.time, eic_signal, max_height, max_prominence, max(eic_signal), min_peak_datapoints, peak_derivative_threshold, signal_threshold=signal_threshold, correct_baseline=correct_baseline, check_abundance=True, plot_res=False, ) eic_data.apexes = [i for i in peak_indexes_generator]
def centroid_detector(self, tic, rt): # need to change the parameter to accommodate EIC peak picking # needs a better algorithm to detect start and end of a peak noise_std = self.chromatogram_settings.std_noise_threshold method = self.chromatogram_settings.noise_threshold_method #peak picking min_height = self.chromatogram_settings.peak_height_min_percent min_datapoints = self.chromatogram_settings.min_peak_datapoints # baseline detection max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent peak_indexes_generator = sp.peak_detector_generator( tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) return peak_indexes_generator
def deconvolution(self, peaks_entity_data, plot_res): plot_res = True domain = self.retention_time signal = self._processed_tic max_height = self.chromatogram_settings.peak_height_max_percent max_prominence = self.chromatogram_settings.peak_max_prominence_percent min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints signal_threshold = self.chromatogram_settings.peak_height_min_percent max_rt_distance = self.chromatogram_settings.max_rt_distance max_signal = max(signal) correct_baseline = False include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, min_peak_datapoints, signal_threshold=signal_threshold, correct_baseline=correct_baseline, plot_res=False) ''' deconvolution window is defined by the TIC peak region''' all_apexes_rt = np.array(list(peaks_entity_data.keys())) '''workaround for peak picking missing some local minimas''' self.processed_appexes = [] for indexes_tuple in include_indexes: start_rt = self.retention_time[indexes_tuple[0]] #apex_rt = self.retention_time[indexes_tuple[1]] final_rt = self.retention_time[indexes_tuple[2]] ''' find all features within TIC peak window''' peak_features_indexes = np.where((all_apexes_rt > start_rt) & (all_apexes_rt < final_rt))[0] peak_features_rts = all_apexes_rt[peak_features_indexes] #print(start_rt, apex_rt, final_rt ) filtered_features_rt = [] filtered_features_abundance = [] for each_apex_rt in peak_features_rts: apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt) peak_features_tic = sum(peaks_entity_data.get(each_apex_rt).get(each_apex_rt).get('abundance')) norm_smooth_tic = (peak_features_tic/ max_signal)*100 ''' TODO: Improve Peak Filtering Calculate peaks sharpness here and filter it out (Amax - An /n)? Peak Fit and Calculate Peak Gaussian Similarity? Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum ''' if norm_smooth_tic > signal_threshold and len(apex_data['mz']) > 1: #print(len(apex_data['mz'])) filtered_features_rt.append(each_apex_rt) filtered_features_abundance.append(peak_features_tic) if len(filtered_features_rt) > 1: ''' more than one peak feature identified inside a TIC peak ''' # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black') #print(filtered_features_rt) grouped_rt = self.hc(filtered_features_rt, filtered_features_abundance, max_rt_distance=max_rt_distance) #print(grouped_rt) for group, apex_rt_list in grouped_rt.items(): ''' each group is a peak feature defined by the hierarchical clutter algorithm ''' group_datadict = {} group_datadict['ref_apex_rt'] = [] for each_group_apex_rt in apex_rt_list: datadict = peaks_entity_data.get(each_group_apex_rt) for rt, each_datadict in datadict.items(): if rt == "ref_apex_rt": group_datadict['ref_apex_rt'].append(each_datadict) else: if rt in group_datadict.keys(): mz_list = each_datadict.get("mz") abundance_list = each_datadict.get("abundance") each_mz_abun = dict(zip(mz_list, abundance_list)) for index_mz, mz in enumerate(group_datadict[rt].get("mz")): if mz in each_mz_abun.keys(): each_mz_abun[mz] = each_mz_abun[mz] + group_datadict[rt].get("abundance")[index_mz] else: each_mz_abun[mz] = group_datadict[rt].get("abundance")[index_mz] group_datadict[rt] = { 'mz': list(each_mz_abun.keys()) , 'abundance': list(each_mz_abun.values()), 'scan_number': each_datadict.get('scan_number') } else: group_datadict[rt] = each_datadict peak_rt = [] peak_tic = [] #print(group_datadict.get('ref_apex_rt')) for rt, each_datadict in group_datadict.items(): if rt != "ref_apex_rt": peak_rt.append(rt) peak_tic.append(sum(each_datadict["abundance"])) peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) smoothed_tic = self.smooth_signal(peak_tic) include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) include_indexes = list(include_indexes) if include_indexes: if len(include_indexes) > 1: ''' after sum there are two apexes check if it is inside the deconvolution window, otherwise ignores it ''' for new_apex_index in include_indexes: # pass self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) else: ''' after sum there is on apex save it ''' new_apex_index = include_indexes[0] #print(include_indexes, group, apex_rt_list) self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) elif len(filtered_features_rt) == 1: ''' only one peak feature inside deconvolution window ''' each_apex_rt = filtered_features_rt[0] datadict = peaks_entity_data.get(each_apex_rt) peak_rt = [] peak_tic = [] for rt, each_datadict in datadict.items(): if rt != "ref_apex_rt": peak_rt.append(rt) peak_tic.append(sum(each_datadict["abundance"])) peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) smoothed_tic = self.smooth_signal(peak_tic) include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) include_indexes = list(include_indexes) if include_indexes: ''' after sum there are two apexes check if it is inside the deconvolution window, otherwise ignores it''' if len(include_indexes) > 1: for new_apex_index in include_indexes: # pass self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) else: ''' after sum there is one apex save it includes_indexes = (start, apex, final )''' new_apex_index = include_indexes[0] self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) else: # print('no data after filter') pass if plot_res: plt.plot(self.retention_time, self._processed_tic, c='black') plt.show()
def find_peaks_entity(self, eic_dict): ''' combine eic with mathing rt apexes''' max_prominence = self.chromatogram_settings.peak_max_prominence_percent max_height = self.chromatogram_settings.peak_height_max_percent signal_threshold = self.chromatogram_settings.eic_signal_threshold min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints correct_baseline = False peaks_entity_data = {} max_eic = 0 for mz, eic_scan_index_rt in eic_dict.items(): ind_max_eic = max(eic_scan_index_rt[0]) max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic for mz, eic_scan_index_rt in eic_dict.items(): eic = eic_scan_index_rt[0] rt_list = eic_scan_index_rt[1] if len(eic) >= min_peak_datapoints: smooth_eic = self.smooth_tic(eic) include_indexes = sp.peak_picking_first_derivative(rt_list, smooth_eic, max_height, max_prominence, max_eic, min_peak_datapoints, signal_threshold=signal_threshold, correct_baseline=correct_baseline) for initial_scan, apex_scan, final_scan in include_indexes: rt_corrected_therm = self.quadratic_interpolation(rt_list, smooth_eic, apex_scan) ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm,4) apex_rt = rt_list[apex_scan] # apex_abundance = smooth_eic[apex_scan] #maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic for scan_index in range(initial_scan, final_scan): peak_rt = rt_list[scan_index] peak_abundance = smooth_eic[scan_index] dict_data = {peak_rt: { 'mz': [mz] , 'abundance':[peak_abundance], 'scan_number': [scan_index] }, "ref_apex_rt": ref_apex_rt } if not apex_rt in peaks_entity_data.keys(): peaks_entity_data[apex_rt] = dict_data else: if not peak_rt in peaks_entity_data[apex_rt].keys(): peaks_entity_data[apex_rt][peak_rt] = dict_data.get(peak_rt) else: existing_data = peaks_entity_data[apex_rt].get(peak_rt) existing_data['mz'].append(mz) existing_data['abundance'].append(peak_abundance) existing_data['scan_number'].append(scan_index) return peaks_entity_data