Beispiel #1
0
    def eic_centroid_detector(self, rt, eic, max_eic):

        max_prominence = self.chromatogram_settings.peak_max_prominence_percent

        max_height = self.chromatogram_settings.peak_height_max_percent

        signal_threshold = self.chromatogram_settings.eic_signal_threshold

        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints

        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold

        correct_baseline = False

        include_indexes = sp.peak_picking_first_derivative(
            rt,
            eic,
            max_height,
            max_prominence,
            max_eic,
            min_peak_datapoints,
            peak_derivative_threshold,
            signal_threshold=signal_threshold,
            correct_baseline=correct_baseline)

        return include_indexes
Beispiel #2
0
    def plot(self,
             ax=None,
             color="black",
             derivative=True,
             deriv_color='red'):  #pragma: no cover

        if self._ms_parent:

            import matplotlib.pyplot as plt

            if ax is None:
                ax = plt.gca()
            x = self._ms_parent.mz_exp_profile[self.start_scan:self.final_scan]
            y = self._ms_parent.abundance_profile[self.start_scan:self.
                                                  final_scan]

            ax.plot(x, y, color=color, label="Data")
            ax.set(xlabel='m/z', ylabel='abundance')
            if derivative and not self._ms_parent.is_centroid:
                dy = sp.derivate(
                    self._ms_parent.
                    abundance_profile[self.start_index:self.final_index + 1])
                ax.plot(x, dy, c=deriv_color)
            else:
                ax.plot((self.mz_exp, self.mz_exp), (0, self.abundance),
                        color=color,
                        label="Data")

            #plt.legend()

            return ax

        else:
            print("Centroid Peak Object")
Beispiel #3
0
    def centroid_detector(self, rt, tic):

        #noise_std = self.chromatogram_settings.std_noise_threshold

        #method = self.chromatogram_settings.noise_threshold_method
        #peak picking
        #min_height = self.chromatogram_settings.peak_height_min_percent
        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints

        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold
        signal_threshold = self.chromatogram_settings.peak_height_min_percent

        # baseline detection
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        max_height = self.chromatogram_settings.peak_height_max_percent

        correct_baseline = False

        #peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)

        include_indexes = sp.peak_picking_first_derivative(
            rt,
            tic,
            max_height,
            max_prominence,
            max(tic),
            min_peak_datapoints,
            peak_derivative_threshold,
            signal_threshold=signal_threshold,
            correct_baseline=correct_baseline)

        return include_indexes
Beispiel #4
0
    def calc_centroid(self, mass, abund, freq):

        max_height = self.mspeaks_settings.peak_height_max_percent
        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
        max_abun = max(abund)
        peak_height_diff = lambda hi, li: (
            (abund[hi] - abund[li]) / max_abun) * 100

        domain = mass
        signal = abund
        len_signal = len(signal)

        signal_threshold, factor = self.get_threshold(abund)
        max_signal = factor

        correct_baseline = False

        include_indexes = sp.peak_picking_first_derivative(
            domain,
            signal,
            max_height,
            max_prominence,
            max_signal,
            min_peak_datapoints,
            peak_derivative_threshold,
            signal_threshold=signal_threshold,
            correct_baseline=correct_baseline,
            abun_norm=1,
            plot_res=False)

        for indexes_tuple in include_indexes:

            apex_index = indexes_tuple[1]

            mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(
                mass, abund, freq, apex_index)

            if mz_exp_centroid:

                peak_indexes = self.check_prominence(abund, apex_index,
                                                     len_signal,
                                                     peak_height_diff)

                if peak_indexes:

                    peak_resolving_power = self.calculate_resolving_power(
                        abund, mass, apex_index)
                    s2n = intes_centr / self.baselise_noise_std
                    self.add_mspeak(self.polarity,
                                    mz_exp_centroid,
                                    abund[apex_index],
                                    peak_resolving_power,
                                    s2n,
                                    indexes_tuple,
                                    exp_freq=freq_centr,
                                    ms_parent=self)
Beispiel #5
0
    def smooth_signal(self, signal):
            
        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
        
        pol_order = self.chromatogram_settings.savgol_pol_order

        window_len = self.chromatogram_settings.smooth_window

        window = self.chromatogram_settings.smooth_method

        return sp.smooth_signal(signal, window_len, window, pol_order, implemented_smooth_method)
Beispiel #6
0
def smooth_signal(signal, parameters: LCMSParameters):

    implemented_smooth_method = parameters.lc_ms.implemented_smooth_method

    pol_order = parameters.lc_ms.savgol_pol_order

    smooth_method = parameters.lc_ms.smooth_method

    window_len = parameters.lc_ms.smooth_window

    return sp.smooth_signal(signal, window_len, smooth_method, pol_order,
                            implemented_smooth_method)
Beispiel #7
0
    def plot_detected_baseline(self, ax=None, color="blue"):  # pragma: no cover

        import matplotlib.pyplot as plt

        if ax is None:

            ax = plt.gca()

        max_height = self.chromatogram_settings.peak_height_max_percent
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent

        baseline = sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence)
        ax.plot(self.retention_time, color=color)
        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')

        return ax
Beispiel #8
0
    def centroid_detector(self, tic, rt):
        ''' this function has been replaced with sp.peak_picking_first_derivative
            and it not used
        '''
        noise_std = self.chromatogram_settings.std_noise_threshold

        method = self.chromatogram_settings.noise_threshold_method

        ''' peak picking'''
        min_height = self.chromatogram_settings.peak_height_min_percent
        min_datapoints = self.chromatogram_settings.min_peak_datapoints
        
        ''' baseline detection'''
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        max_height = self.chromatogram_settings.peak_height_max_percent
        
        peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)

        return peak_indexes_generator
Beispiel #9
0
    def centroid_detector(self, tic, rt):

        noise_std = self.chromatogram_settings.std_noise_threshold

        method = self.chromatogram_settings.noise_threshold_method

        #peak picking
        min_height = self.chromatogram_settings.peak_height_min_percent
        min_datapoints = self.chromatogram_settings.min_peak_datapoints

        # baseline detection
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        max_height = self.chromatogram_settings.peak_height_max_percent

        peak_indexes_generator = sp.peak_detector_generator(
            tic, noise_std, method, rt, max_height, min_height, max_prominence,
            min_datapoints)

        return peak_indexes_generator
Beispiel #10
0
def eic_centroid_detector(max_tic,
                          eic_data: EIC_Data,
                          parameters: LCMSParameters,
                          smooth=True):
    # Do peak picking and store results inside EIC_Data class

    max_prominence = parameters.lc_ms.peak_max_prominence_percent

    max_height = parameters.lc_ms.peak_height_max_percent

    signal_threshold = parameters.lc_ms.eic_signal_threshold

    min_peak_datapoints = parameters.lc_ms.min_peak_datapoints

    correct_baseline = parameters.lc_ms.correct_eic_baseline

    peak_derivative_threshold = parameters.lc_ms.peak_derivative_threshold

    if smooth:

        eic_signal = smooth_signal(eic_data.eic, parameters)

    else:

        eic_signal = eic_data.eic

    peak_indexes_generator = sp.peak_picking_first_derivative(
        eic_data.time,
        eic_signal,
        max_height,
        max_prominence,
        max(eic_signal),
        min_peak_datapoints,
        peak_derivative_threshold,
        signal_threshold=signal_threshold,
        correct_baseline=correct_baseline,
        check_abundance=True,
        plot_res=False,
    )
    eic_data.apexes = [i for i in peak_indexes_generator]
Beispiel #11
0
    def centroid_detector(self, tic, rt):

        # need to change the parameter to accommodate EIC peak picking
        # needs a better algorithm to detect start and end of a peak

        noise_std = self.chromatogram_settings.std_noise_threshold

        method = self.chromatogram_settings.noise_threshold_method

        #peak picking
        min_height = self.chromatogram_settings.peak_height_min_percent
        min_datapoints = self.chromatogram_settings.min_peak_datapoints

        # baseline detection
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        max_height = self.chromatogram_settings.peak_height_max_percent

        peak_indexes_generator = sp.peak_detector_generator(
            tic, noise_std, method, rt, max_height, min_height, max_prominence,
            min_datapoints)

        return peak_indexes_generator
Beispiel #12
0
    def deconvolution(self, peaks_entity_data, plot_res):
        
        plot_res = True 
        domain = self.retention_time
        signal = self._processed_tic
        max_height = self.chromatogram_settings.peak_height_max_percent
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints
        signal_threshold = self.chromatogram_settings.peak_height_min_percent
        max_rt_distance = self.chromatogram_settings.max_rt_distance

        max_signal = max(signal)
        correct_baseline = False
        
        include_indexes = sp.peak_picking_first_derivative(domain, signal,  max_height, max_prominence, max_signal, min_peak_datapoints,
                                                    signal_threshold=signal_threshold, correct_baseline=correct_baseline, plot_res=False)
        
        ''' deconvolution window is defined by the TIC peak region'''
        all_apexes_rt = np.array(list(peaks_entity_data.keys()))

        '''workaround for peak picking missing some local minimas'''
        self.processed_appexes = []
        
        for indexes_tuple in include_indexes:

            start_rt = self.retention_time[indexes_tuple[0]]
            #apex_rt = self.retention_time[indexes_tuple[1]]
            final_rt = self.retention_time[indexes_tuple[2]]

            ''' find all features within TIC peak window'''
            peak_features_indexes = np.where((all_apexes_rt > start_rt) & (all_apexes_rt < final_rt))[0]
            peak_features_rts = all_apexes_rt[peak_features_indexes]
            
            #print(start_rt, apex_rt, final_rt )
            
            filtered_features_rt = []
            filtered_features_abundance = []
            
            for each_apex_rt in peak_features_rts:
                
                apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt)

                peak_features_tic = sum(peaks_entity_data.get(each_apex_rt).get(each_apex_rt).get('abundance'))
            
                norm_smooth_tic = (peak_features_tic/ max_signal)*100

                ''' TODO: 
                    Improve Peak Filtering

                    Calculate peaks sharpness here and filter it out (Amax - An /n)?
                    Peak Fit and Calculate Peak Gaussian Similarity?
                    Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum
                '''
                if norm_smooth_tic > signal_threshold and len(apex_data['mz']) > 1:
                       
                       #print(len(apex_data['mz']))
                       filtered_features_rt.append(each_apex_rt)
                       filtered_features_abundance.append(peak_features_tic)
            
            if len(filtered_features_rt) > 1: 
                ''' more than one peak feature identified inside a TIC peak  '''
                # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black')

                #print(filtered_features_rt)
                grouped_rt = self.hc(filtered_features_rt, filtered_features_abundance, max_rt_distance=max_rt_distance)
                #print(grouped_rt)
                
                for group, apex_rt_list in grouped_rt.items():
                    ''' each group is a peak feature defined by the hierarchical clutter algorithm
                        
                    '''
                    group_datadict = {}
                    group_datadict['ref_apex_rt'] = []

                    for each_group_apex_rt in apex_rt_list:
                        
                        datadict = peaks_entity_data.get(each_group_apex_rt)

                        for rt, each_datadict in datadict.items():
                            
                            if rt == "ref_apex_rt":
                                
                                group_datadict['ref_apex_rt'].append(each_datadict)
                            
                            else:
                                
                                if rt in group_datadict.keys():
                                    
                                    mz_list = each_datadict.get("mz")
                                    abundance_list = each_datadict.get("abundance")
                                    
                                    each_mz_abun = dict(zip(mz_list, abundance_list)) 

                                    for index_mz, mz in enumerate(group_datadict[rt].get("mz")):
                                        if mz in each_mz_abun.keys():
                                            
                                            each_mz_abun[mz] = each_mz_abun[mz] + group_datadict[rt].get("abundance")[index_mz]
                                        
                                        else:    
                                            
                                            each_mz_abun[mz] = group_datadict[rt].get("abundance")[index_mz]
                                    
                                    group_datadict[rt] = { 'mz': list(each_mz_abun.keys()) , 
                                                        'abundance': list(each_mz_abun.values()), 
                                                        'scan_number': each_datadict.get('scan_number') }
                                                        

                                else:
                                    
                                    group_datadict[rt] = each_datadict
                            
                    peak_rt = []
                    peak_tic = []

                    #print(group_datadict.get('ref_apex_rt'))
                    for rt, each_datadict in group_datadict.items():
                        if rt != "ref_apex_rt":
                            peak_rt.append(rt)
                            peak_tic.append(sum(each_datadict["abundance"]))
                    
                    peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
                    
                    smoothed_tic = self.smooth_signal(peak_tic)
                    
                    include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic,  max_height, max_prominence, max_signal, min_peak_datapoints,
                                                                            signal_threshold=signal_threshold,  correct_baseline=False, plot_res=False)
                    
                    include_indexes = list(include_indexes)
                   
                    if include_indexes:
                        
                        if len(include_indexes) > 1:
                            ''' after sum there are two apexes
                                check if it is inside the deconvolution window, otherwise ignores it
                            '''
                    
                            for new_apex_index in include_indexes:
                                # pass
                                self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res)
                                
                        else:
                            ''' after sum there is on apex
                                save it
                            ''' 
                            new_apex_index = include_indexes[0]
                            #print(include_indexes, group, apex_rt_list)
                            self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res)

                    
            elif len(filtered_features_rt) == 1:
                ''' only one peak feature inside deconvolution window '''
                
                each_apex_rt = filtered_features_rt[0]

                datadict = peaks_entity_data.get(each_apex_rt)

                
                peak_rt = []
                peak_tic = []
                
                for rt, each_datadict in datadict.items():
                    
                    if rt != "ref_apex_rt":
                        peak_rt.append(rt)
                        peak_tic.append(sum(each_datadict["abundance"]))
                    
                peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
                
                smoothed_tic = self.smooth_signal(peak_tic)

                include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic,  max_height, max_prominence, max_signal, min_peak_datapoints,
                                                                            signal_threshold=signal_threshold,  correct_baseline=False, plot_res=False)
                include_indexes = list(include_indexes)   

                if include_indexes:
                        
                        ''' after sum there are two apexes
                            check if it is inside the deconvolution window, otherwise ignores it'''
                        if len(include_indexes) > 1:
                            
                            for new_apex_index in include_indexes:
                                # pass
                                self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res)
                               
                        else:
                            ''' after sum there is one apex
                            save it
                            includes_indexes = (start, apex, final )'''
                        
                            new_apex_index = include_indexes[0]

                            self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res)
                
            else:
                
                # print('no data after filter')
                pass
        if plot_res:            
            plt.plot(self.retention_time, self._processed_tic, c='black')
            plt.show()
Beispiel #13
0
    def find_peaks_entity(self, eic_dict):
        
        ''' combine eic with mathing rt apexes''' 
        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
        
        max_height = self.chromatogram_settings.peak_height_max_percent

        signal_threshold = self.chromatogram_settings.eic_signal_threshold

        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints

        correct_baseline = False
        peaks_entity_data = {}

        max_eic = 0
        for mz, eic_scan_index_rt in eic_dict.items():
            
            ind_max_eic = max(eic_scan_index_rt[0])
            max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic
        
        for mz, eic_scan_index_rt in eic_dict.items():
            
            eic = eic_scan_index_rt[0]
            rt_list = eic_scan_index_rt[1]

            if len(eic) >= min_peak_datapoints:

                smooth_eic = self.smooth_tic(eic)

                include_indexes = sp.peak_picking_first_derivative(rt_list, smooth_eic,  max_height, max_prominence, max_eic, min_peak_datapoints,
                                                            signal_threshold=signal_threshold,  correct_baseline=correct_baseline)

                for initial_scan, apex_scan, final_scan in include_indexes:

                        rt_corrected_therm =  self.quadratic_interpolation(rt_list, smooth_eic, apex_scan)
                        
                        ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm,4)
                        
                        apex_rt = rt_list[apex_scan]
                        # apex_abundance = smooth_eic[apex_scan]

                        #maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic
                        
                        for scan_index in range(initial_scan, final_scan):
                            
                            peak_rt = rt_list[scan_index]
                            peak_abundance = smooth_eic[scan_index]

                            dict_data = {peak_rt: { 'mz': [mz] ,
                                                    'abundance':[peak_abundance],
                                                    'scan_number': [scan_index] },
                                        "ref_apex_rt": ref_apex_rt
                                        }

                            if not apex_rt in peaks_entity_data.keys():
                                
                                peaks_entity_data[apex_rt] = dict_data
                            
                            else:
                                
                                if not peak_rt in peaks_entity_data[apex_rt].keys():
                                    
                                    peaks_entity_data[apex_rt][peak_rt] = dict_data.get(peak_rt)

                                else:    
                                    
                                    existing_data = peaks_entity_data[apex_rt].get(peak_rt)
                                    
                                    existing_data['mz'].append(mz)
                                    existing_data['abundance'].append(peak_abundance)
                                    existing_data['scan_number'].append(scan_index)    
        
        
        return peaks_entity_data