def test_minimal(self): test_matr = np.zeros([20, 100]) test_matr[0, 10] = 1 lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1) assert_(len(lines) == 1) test_matr = np.zeros([20, 100]) test_matr[0:2, 10] = 1 lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1) assert_(len(lines) == 1)
def test_single_pass(self): distances = [0, 1, 2, 5] gaps = [0, 1, 2, 0, 1] test_matr = np.zeros([20, 50]) + 1e-12 length = 12 line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps) test_matr[line[0], line[1]] = 1 max_distances = max(distances)*np.ones(20) identified_lines = _identify_ridge_lines(test_matr, max_distances, max(gaps) + 1) assert_array_equal(identified_lines, [line])
def test_single_bigdist(self): distances = [0, 1, 2, 5] gaps = [0, 1, 2, 4] test_matr = np.zeros([20, 50]) length = 12 line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps) test_matr[line[0], line[1]] = 1 max_dist = 3 max_distances = max_dist*np.ones(20) #This should get 2 lines, since the distance is too large identified_lines = _identify_ridge_lines(test_matr, max_distances, max(gaps) + 1) assert_(len(identified_lines) == 2) for iline in identified_lines: adists = np.diff(iline[1]) np.testing.assert_array_less(np.abs(adists), max_dist) agaps = np.diff(iline[0]) np.testing.assert_array_less(np.abs(agaps), max(gaps) + 0.1)
def test_single_biggaps(self): distances = [0] max_gap = 1 gaps = [3, 6] test_matr = np.zeros([50, 50]) length = 30 line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps) test_matr[line[0], line[1]] = 1 max_dist = 1 max_distances = max_dist*np.ones(50) #This should get 3 lines, since the gaps are too large identified_lines = _identify_ridge_lines(test_matr, max_distances, max_gap) assert_(len(identified_lines) == 3) for iline in identified_lines: adists = np.diff(iline[1]) np.testing.assert_array_less(np.abs(adists), max_dist) agaps = np.diff(iline[0]) np.testing.assert_array_less(np.abs(agaps), max(gaps) + 0.1)
def find_peaks(data, widths=[1, 2, 7, 30, 182, 365]): ''' Finds the peaks using the CWTFindPeaks algorithm. This code is mostly a line by line port of the scipy.signal.wavelets.find_peaks_cwt. We had to port that code since the default scipy implementation does not return the widths. Paramaters ---------- data : array like the time series to find the peaks widths : array like the candidate widths to test Returns ------- A list of tripples (peak_volume, peak_width, peak_position). The volume is the value of data[peak_position]. The width is the estimated width of the wavelet used to find that peak. ''' data = np.asanyarray(data) widths = np.asanyarray(widths) #These are default values from the scipy port which we based our code on. gap_thresh = np.ceil(widths[0]) max_distances = widths / 4.0 cwt_dat = cwt(data, ricker, widths) ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh) filtered = _filter_ridge_lines(cwt_dat, ridge_lines, \ min_snr=1, noise_perc=1) #noise_perc=1 filters more noise. #Filtered will be of the form [[peak_widths], [peak_positions]] candidates = [] for x in filtered: assert x[0].min() >= 0 assert x[0].max() < widths.shape[0] peak_pos, peak_width = x[1][0], widths[x[0].max()] candidates.append((data[peak_pos], peak_width, peak_pos)) return sorted(candidates, reverse=True)
def wavelet_peak_find(s, min_snr=1., assume_sig=4., min_length=8.0, max_dist=4.0, gap_thresh=2.0): # this import is here to let scipy 0.9.0 at least # load this module import scipy.signal._peak_finding as spf y, t = s.values, s.index widths = np.linspace(1, 100, 200) cwtm = spf.cwt(y, spf.ricker, widths) ridges = spf._identify_ridge_lines(cwtm, widths / max_dist, gap_thresh) filt_ridges = spf._filter_ridge_lines(cwtm, ridges, \ min_length=cwtm.shape[0] / min_length, min_snr=min_snr) ### the next code is just to visualize how this works #import matplotlib.pyplot as plt #ctr_x, ctr_y = np.meshgrid(t, widths) #ctr_y *= (t[1] - t[0]) #plt.contourf(ctr_x, ctr_y, cwtm) ##plt.imshow(cwtm) #, extent=(widths[0], widths[-1], times[0], times[-1])) #for l in ridges: # plt.plot(t[l[1]], l[0] * 0.5 * (t[1] - t[0]), 'k-') #for l in filt_ridges: # plt.plot(t[l[1]], l[0] * 0.5 * (t[1] - t[0]), 'r-') ##plt.plot(peaks_t, peaks_w, 'k*') # not working #plt.show() # loop through the ridges and find the point of maximum # intensity on the ridge and save its characteristics peak_list = [] for i, l in enumerate(filt_ridges): pl = np.argmax([cwtm[j, k] for j, k in zip(l[0], l[1])]) peak_w = widths[l[0][pl]] * 0.5 * (t[1] - t[0]) peak_amp = cwtm[l[0][pl], l[1][pl]] / (widths[l[0]][pl] ** 0.5) peak_t = t[l[1][pl]] t0, t1 = peak_t - assume_sig * peak_w, peak_t + assume_sig * peak_w peak_list.append({'t0': t0, 't1': t1, 'x': peak_t, \ 'h': peak_amp, 'w': peak_w}) return peak_list
def test_empty(self): test_matr = np.zeros([20, 100]) lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1) assert_(len(lines) == 0)
def test_empty(self): test_matr = np.zeros([20, 100]) lines = _identify_ridge_lines(test_matr, 2 * np.ones(20), 1) assert_(len(lines) == 0)
def detect_peaks(x: np.ndarray, y: np.ndarray, widths: np.ndarray, min_length: int = 5, max_distance: int = 2, gap_threshold: int = 1, snr: float = 3, min_width: float = 5, max_width: float = 60, estimators: Union[str, _estimator_type] = "default"): r""" Find peaks in a 1D signal. Peaks are detected using a modified version of the algorithm described in [1]. Parameters ---------- x : sorted array y : array of intensities widths : array Array of widths, in x units. Used as scales to build the wavelet array. min_length : int Minimum number of points in a ridge line. max_distance : float Maximum x distance between consecutive points in a ridge line, in x units. gap_threshold : int Maximum number of consecutive missing peaks in a ridge line. snr : positive number Signal-to-noise- ratio used to filter peaks. Defined as follows: .. math:: SNR = \frac{peak height - baseline}{noise} min_width : positive number Minimum width of the peaks max_width : positive number Maximum width of the peaks estimators : str or dict How to estimate baseline, noise, peak height, peak width, peak area and peak location. If `estimators` is 'cwt', parameters are computed as described in [1]. Check the Notes to see how estimations in 'default' mode are computed or how custom estimators can be used. Returns ------- peaks : List of PeakLocation params : dict of peak parameters Notes ----- Peaks are detected using the CWT algorithm described in [DP06]. The optimum scale where each peak is detected is the local maximum at the lowest scale in the ridge line. If no local maximum was found, the scale with the maximum coefficient is chosen. After finding a peak, the extension of the peak is found by finding the nearest local minimum at both sides of the peak, using the wavelet coefficients with the best scale. A peak is represented then by three indices specifying the peak location, peak start and peak end. These three values, together with baseline and noise estimations are used to estimate peak parameters. If the mode used is 'default`, the peak parameters are defined as follows: baseline : A baseline is built using y values where no peak was detected. These values are interpolated to build the baseline. noise : The noise is computed as the standard deviation of the values used to build the baseline. To obtain a robust estimation, the median absolute deviation of the baseline is used. height : The height of a peak is computed as the difference between the y value baseline value at the peak location snr : The quotient between the height of the peak and the noise. area : Area of the peak obtained by integration between the start and the end of the peak. The area of the baseline is subtracted. width : The peak width is computed as the peak extension, that is, the difference between the end and the start of the peak. After computing these parameters, peaks are filtered based on SNR and peak width. Peak overlap between the filtered peaks is analyzed then. Two peaks are overlapping if there is superposition in their peak extensions. Overlapping peaks are flagged, their extension corrected and corrected peak parameters are computed again. Custom estimators can be used for noise, baseline, peak height, peak location, peak width and peak area: .. code-block:: python estimators = {"baseline": baseline_func, "noise": noise_func, "height": height_func, "loc": loc_func, "width": width_func, "area": area_func} # x and y are the same array used in the function # peaks is a list of PeakLocation instances # peak is a single PeakLocation instance # baseline must have the same size as x and y baseline = baseline_func(x, y, peaks) # noise is a positive number noise = noise_func(x, y, peaks) # peak_parameters are all positive numbers # (area and height can be zero) height = height_func(x, y, peak, baseline) area = area_func(x, y, peak, baseline) width = width_func(x, y, peak, baseline) loc = loc_func(x, y, peak, baseline) References ---------- .. [DP06] Pan Du, Warren A. Kibbe, Simon M. Lin, Improved peak detection in mass spectrum by incorporating continuous wavelet transform-based pattern matching, Bioinformatics, Volume 22, Issue 17, 1 September 2006, Pages 2059–2065, https://doi.org/10.1093/bioinformatics/btl355 """ # Convert to uniform sampling xu, yu = _resample_data(x, y) # convert parameters to number of points widths, max_distance = \ _convert_to_points(xu, widths, max_distance) # detect peaks in the ridge lines w = cwt(yu, ricker, widths) ridge_lines = \ _peak_finding._identify_ridge_lines(w, max_distance, gap_threshold) # y_peaks are the local maxima of y and are used to validate peaks # y_peaks = find_peaks(yu)[0] y_peaks = argrelmax(yu, order=2)[0] peaks = _process_ridge_lines(w, y_peaks, ridge_lines, min_length, max_distance) # baseline and noise estimation if estimators == "default": baseline, noise = baseline_noise_estimation(yu) elif estimators == "cwt": baseline, noise = None, None else: baseline = estimators["baseline"](xu, yu, peaks) noise = estimators["noise"](xu, yu, peaks) # peak filtering and parameter estimation peaks, params = \ _estimate_params(xu, yu, widths, w, peaks, snr, min_width, max_width, estimators, baseline=baseline, noise=noise) # sort peaks based on location sorted_index = sorted(range(len(peaks)), key=lambda s: peaks[s].loc) peaks = [peaks[k] for k in sorted_index] params = [params[k] for k in sorted_index] # find and correct overlap between consecutive peaks: overlap_index = list() rm_index = list() for k in range(len(peaks) - 1): left, right = peaks[k], peaks[k + 1] is_same_peak = right.loc == left.loc merge = (right.loc - left.loc) <= max_distance[0] has_overlap = left.end > right.start if is_same_peak: rm_index.append(k + (left.scale < right.scale)) elif merge: rm_index.append(k) right.start = left.start right.loc = (left.loc + right.loc) // 2 elif has_overlap: _fix_peak_extension(left, right, yu) overlap_index.extend([k, k + 1]) # remove invalid peaks after the extension was fixed if yu[left.loc] < max(yu[left.start], yu[left.end]): rm_index.append(k) overlap_peaks = [peaks[x] for x in overlap_index] # if there are peaks with overlap, then compute again peak parameters after # correction if overlap_index: _, overlap_params = \ _estimate_params(xu, yu, widths, w, overlap_peaks, snr, min_width, max_width, estimators, baseline=baseline, noise=noise, append_empty_params=True) # replace corrected values in params: for k, param in zip(overlap_index, overlap_params): if len(param): params[k] = param else: rm_index.append(k) # remove invalid peaks and back scale peaks peaks = [p.rescale(xu, x) for (k, p) in enumerate(peaks) if k not in rm_index] params = [p for k, p in enumerate(params) if (len(p) and k not in rm_index)] return peaks, params