Example #1
0
    def test_minimal(self):
        test_matr = np.zeros([20, 100])
        test_matr[0, 10] = 1
        lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1)
        assert_(len(lines) == 1)

        test_matr = np.zeros([20, 100])
        test_matr[0:2, 10] = 1
        lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1)
        assert_(len(lines) == 1)
Example #2
0
 def test_single_pass(self):
     distances = [0, 1, 2, 5]
     gaps = [0, 1, 2, 0, 1]
     test_matr = np.zeros([20, 50]) + 1e-12
     length = 12
     line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps)
     test_matr[line[0], line[1]] = 1
     max_distances = max(distances)*np.ones(20)
     identified_lines = _identify_ridge_lines(test_matr, max_distances, max(gaps) + 1)
     assert_array_equal(identified_lines, [line])
Example #3
0
    def test_single_bigdist(self):
        distances = [0, 1, 2, 5]
        gaps = [0, 1, 2, 4]
        test_matr = np.zeros([20, 50])
        length = 12
        line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps)
        test_matr[line[0], line[1]] = 1
        max_dist = 3
        max_distances = max_dist*np.ones(20)
        #This should get 2 lines, since the distance is too large
        identified_lines = _identify_ridge_lines(test_matr, max_distances, max(gaps) + 1)
        assert_(len(identified_lines) == 2)

        for iline in identified_lines:
            adists = np.diff(iline[1])
            np.testing.assert_array_less(np.abs(adists), max_dist)

            agaps = np.diff(iline[0])
            np.testing.assert_array_less(np.abs(agaps), max(gaps) + 0.1)
Example #4
0
    def test_single_biggaps(self):
        distances = [0]
        max_gap = 1
        gaps = [3, 6]
        test_matr = np.zeros([50, 50])
        length = 30
        line = _gen_ridge_line([0, 25], test_matr.shape, length, distances, gaps)
        test_matr[line[0], line[1]] = 1
        max_dist = 1
        max_distances = max_dist*np.ones(50)
        #This should get 3 lines, since the gaps are too large
        identified_lines = _identify_ridge_lines(test_matr, max_distances, max_gap)
        assert_(len(identified_lines) == 3)

        for iline in identified_lines:
            adists = np.diff(iline[1])
            np.testing.assert_array_less(np.abs(adists), max_dist)

            agaps = np.diff(iline[0])
            np.testing.assert_array_less(np.abs(agaps), max(gaps) + 0.1)
Example #5
0
def find_peaks(data, widths=[1, 2, 7, 30, 182, 365]):
    '''
    Finds the peaks using the CWTFindPeaks algorithm. This code is mostly a line
    by line port of the scipy.signal.wavelets.find_peaks_cwt. We had to port that
    code since the default scipy implementation does not return the widths.

    Paramaters
    ----------
    data : array like
        the time series to find the peaks
    widths : array like
        the candidate widths to test

    Returns
    -------
    A list of tripples (peak_volume, peak_width, peak_position). The volume is
    the value of data[peak_position]. The width is the estimated width of the
    wavelet used to find that peak.
    '''
    data = np.asanyarray(data)
    widths = np.asanyarray(widths)

    #These are default values from the scipy port which we based our code on.
    gap_thresh = np.ceil(widths[0])
    max_distances = widths / 4.0

    cwt_dat = cwt(data, ricker, widths)
    ridge_lines = _identify_ridge_lines(cwt_dat, max_distances, gap_thresh)
    filtered = _filter_ridge_lines(cwt_dat, ridge_lines, \
            min_snr=1, noise_perc=1) #noise_perc=1 filters more noise.
    
    #Filtered will be of the form [[peak_widths], [peak_positions]]
    candidates = []
    for x in filtered:
        assert x[0].min() >= 0
        assert x[0].max() < widths.shape[0]

        peak_pos, peak_width = x[1][0], widths[x[0].max()]
        candidates.append((data[peak_pos], peak_width, peak_pos))
    
    return sorted(candidates, reverse=True)
Example #6
0
def wavelet_peak_find(s, min_snr=1., assume_sig=4., min_length=8.0,
                      max_dist=4.0, gap_thresh=2.0):
    # this import is here to let scipy 0.9.0 at least
    # load this module
    import scipy.signal._peak_finding as spf

    y, t = s.values, s.index

    widths = np.linspace(1, 100, 200)
    cwtm = spf.cwt(y, spf.ricker, widths)
    ridges = spf._identify_ridge_lines(cwtm, widths / max_dist, gap_thresh)
    filt_ridges = spf._filter_ridge_lines(cwtm, ridges, \
      min_length=cwtm.shape[0] / min_length, min_snr=min_snr)

    ### the next code is just to visualize how this works
    #import matplotlib.pyplot as plt
    #ctr_x, ctr_y = np.meshgrid(t, widths)
    #ctr_y *= (t[1] - t[0])
    #plt.contourf(ctr_x, ctr_y, cwtm)
    ##plt.imshow(cwtm) #, extent=(widths[0], widths[-1], times[0], times[-1]))
    #for l in ridges:
    #    plt.plot(t[l[1]], l[0] * 0.5 * (t[1] - t[0]), 'k-')
    #for l in filt_ridges:
    #    plt.plot(t[l[1]], l[0] * 0.5 * (t[1] - t[0]), 'r-')
    ##plt.plot(peaks_t, peaks_w, 'k*')  # not working
    #plt.show()

    # loop through the ridges and find the point of maximum
    # intensity on the ridge and save its characteristics
    peak_list = []
    for i, l in enumerate(filt_ridges):
        pl = np.argmax([cwtm[j, k] for j, k in zip(l[0], l[1])])
        peak_w = widths[l[0][pl]] * 0.5 * (t[1] - t[0])
        peak_amp = cwtm[l[0][pl], l[1][pl]] / (widths[l[0]][pl] ** 0.5)
        peak_t = t[l[1][pl]]
        t0, t1 = peak_t - assume_sig * peak_w, peak_t + assume_sig * peak_w
        peak_list.append({'t0': t0, 't1': t1, 'x': peak_t, \
                          'h': peak_amp, 'w': peak_w})
    return peak_list
Example #7
0
 def test_empty(self):
     test_matr = np.zeros([20, 100])
     lines = _identify_ridge_lines(test_matr, 2*np.ones(20), 1)
     assert_(len(lines) == 0)
Example #8
0
 def test_empty(self):
     test_matr = np.zeros([20, 100])
     lines = _identify_ridge_lines(test_matr, 2 * np.ones(20), 1)
     assert_(len(lines) == 0)
Example #9
0
def detect_peaks(x: np.ndarray, y: np.ndarray, widths: np.ndarray,
                 min_length: int = 5, max_distance: int = 2,
                 gap_threshold: int = 1, snr: float = 3, min_width: float = 5,
                 max_width: float = 60,
                 estimators: Union[str, _estimator_type] = "default"):
    r"""
    Find peaks in a 1D signal.

    Peaks are detected using a modified version of the algorithm described in
    [1].

    Parameters
    ----------
    x : sorted array
    y : array of intensities
    widths : array
        Array of widths, in x units. Used as scales to build the wavelet
        array.
    min_length : int
        Minimum number of points in a ridge line.
    max_distance : float
        Maximum x distance between consecutive points in a ridge line, in x
        units.
    gap_threshold : int
        Maximum number of consecutive missing peaks in a ridge line.
    snr : positive number
        Signal-to-noise- ratio used to filter peaks. Defined as follows:

        .. math::

            SNR = \frac{peak height - baseline}{noise}

    min_width : positive number
        Minimum width of the peaks
    max_width : positive number
        Maximum width of the peaks
    estimators : str or dict
        How to estimate baseline, noise, peak height, peak width, peak area and
        peak location. If `estimators` is 'cwt', parameters are computed as
        described in [1]. Check the Notes to see how estimations in 'default'
        mode are computed or how custom estimators can be used.

    Returns
    -------
    peaks : List of PeakLocation
    params : dict of peak parameters

    Notes
    -----
    Peaks are detected using the CWT algorithm described in [DP06]. The optimum
    scale where each peak is detected is the local maximum at the lowest scale
    in the ridge line. If no local maximum was found, the scale with the maximum
    coefficient is chosen. After finding a peak, the extension of the peak
    is found by finding the nearest local minimum at both sides of the peak,
    using the wavelet coefficients with the best scale. A peak is represented
    then by three indices specifying the peak location, peak start and peak end.
    These three values, together with baseline and noise estimations are used
    to estimate peak parameters. If the mode used is 'default`, the peak
    parameters are defined as follows:

        baseline :
            A baseline is built using y values where no peak was detected. These
            values are interpolated to build the baseline.
        noise :
            The noise is computed as the standard deviation of the values used
            to build the baseline. To obtain a robust estimation, the median
            absolute deviation of the baseline is used.
        height :
            The height of a peak is computed as the difference between the
            y value baseline value at the peak location
        snr :
            The quotient between the height of the peak and the noise.
        area :
            Area of the peak obtained by integration between the start and
            the end of the peak. The area of the baseline is subtracted.
        width :
            The peak width is computed as the peak extension, that is, the
            difference between the end and the start of the peak.

    After computing these parameters, peaks are filtered based on SNR and peak
    width. Peak overlap between the filtered peaks is analyzed then. Two
    peaks are overlapping if there is superposition in their peak extensions.
    Overlapping peaks are flagged, their extension corrected and  corrected peak
    parameters are computed again.

    Custom estimators can be used for noise, baseline, peak height, peak
    location, peak width and peak area:

    .. code-block:: python

            estimators = {"baseline": baseline_func, "noise": noise_func,
                          "height": height_func,  "loc": loc_func,
                          "width": width_func, "area": area_func}

            # x and y are the same array used in the function
            # peaks is a list of PeakLocation instances
            # peak is a single PeakLocation instance

            # baseline must have the same size as x and y
            baseline = baseline_func(x, y, peaks)
            # noise is a positive number
            noise = noise_func(x, y, peaks)
            # peak_parameters are all positive numbers
            # (area and height can be zero)
            height = height_func(x, y, peak, baseline)
            area = area_func(x, y, peak, baseline)
            width = width_func(x, y, peak, baseline)
            loc = loc_func(x, y, peak, baseline)

    References
    ----------

    .. [DP06] Pan Du, Warren A. Kibbe, Simon M. Lin, Improved peak detection in
        mass spectrum by incorporating continuous wavelet transform-based
        pattern matching, Bioinformatics, Volume 22, Issue 17, 1 September 2006,
        Pages 2059–2065, https://doi.org/10.1093/bioinformatics/btl355

    """

    # Convert to uniform sampling
    xu, yu = _resample_data(x, y)

    # convert parameters to number of points
    widths, max_distance = \
        _convert_to_points(xu, widths, max_distance)

    # detect peaks in the ridge lines
    w = cwt(yu, ricker, widths)
    ridge_lines = \
        _peak_finding._identify_ridge_lines(w, max_distance, gap_threshold)
    # y_peaks are the local maxima of y and are used to validate peaks
    # y_peaks = find_peaks(yu)[0]
    y_peaks = argrelmax(yu, order=2)[0]
    peaks = _process_ridge_lines(w, y_peaks, ridge_lines, min_length,
                                 max_distance)

    # baseline and noise estimation
    if estimators == "default":
        baseline, noise = baseline_noise_estimation(yu)
    elif estimators == "cwt":
        baseline, noise = None, None
    else:
        baseline = estimators["baseline"](xu, yu, peaks)
        noise = estimators["noise"](xu, yu, peaks)

    # peak filtering and parameter estimation
    peaks, params = \
        _estimate_params(xu, yu, widths, w, peaks, snr, min_width, max_width,
                         estimators, baseline=baseline, noise=noise)

    # sort peaks based on location
    sorted_index = sorted(range(len(peaks)), key=lambda s: peaks[s].loc)
    peaks = [peaks[k] for k in sorted_index]
    params = [params[k] for k in sorted_index]

    # find and correct overlap between consecutive peaks:
    overlap_index = list()
    rm_index = list()
    for k in range(len(peaks) - 1):
        left, right = peaks[k], peaks[k + 1]
        is_same_peak = right.loc == left.loc
        merge = (right.loc - left.loc) <= max_distance[0]
        has_overlap = left.end > right.start
        if is_same_peak:
            rm_index.append(k + (left.scale < right.scale))
        elif merge:
            rm_index.append(k)
            right.start = left.start
            right.loc = (left.loc + right.loc) // 2
        elif has_overlap:
            _fix_peak_extension(left, right, yu)
            overlap_index.extend([k, k + 1])
        # remove invalid peaks after the extension was fixed
        if yu[left.loc] < max(yu[left.start], yu[left.end]):
            rm_index.append(k)

    overlap_peaks = [peaks[x] for x in overlap_index]

    # if there are peaks with overlap, then compute again peak parameters after
    # correction
    if overlap_index:
        _, overlap_params = \
            _estimate_params(xu, yu, widths, w, overlap_peaks, snr, min_width,
                             max_width, estimators, baseline=baseline,
                             noise=noise, append_empty_params=True)
        # replace corrected values in params:
        for k, param in zip(overlap_index, overlap_params):
            if len(param):
                params[k] = param
            else:
                rm_index.append(k)

    # remove invalid peaks and back scale peaks
    peaks = [p.rescale(xu, x) for (k, p) in enumerate(peaks)
             if k not in rm_index]
    params = [p for k, p in enumerate(params) if (len(p) and k not in rm_index)]

    return peaks, params