예제 #1
0
class RangeTree(Generic[V]):
    """A specialized tree dealing with ranges."""
    def __init__(self) -> None:
        self._tree = FastAVLTree(
        )  # Map ints to tuples (val, Union[end, InfinityMarker])

    def __setitem__(self, key: Union[slice, range], value: V) -> None:
        """Set a value to the given interval.

        If the interval is already occupied, a ValueError will be thrown.

        Only slices and ranges with the default step (1) are supported.

        If the slice or range is inverted (end < start), the interval will be
        flipped.

        Open slices and ranges ([:1], [1:]) are supported.
        """
        if isinstance(key, (slice, range)):
            if key.step is not None and key.step != 1:
                m = 'Intervals with custom steps ({}) not' \
                    ' supported.'.format(key)
                raise ValueError(m)
        else:
            raise ValueError('Only slices and ranges supported.')
        s, e = key.start, key.stop
        if s is not None and e is not None and s > e:
            s, e = e, s

        # The check for an empty space is a little complex.
        # First check the lower bound.
        anchor = s if s is not None else e - 1
        try:
            lower_item = self._tree.floor_item(anchor)
        except KeyError:
            lower_item = None
        if lower_item is not None:
            if (s is None or lower_item[1][1] is InfinityMarker.INF_PLUS
                    or (lower_item[1][1] is not InfinityMarker.INF_MINUS
                        and lower_item[1][1] > s)):
                raise KeyError('Overlapping intervals.')

        # Now the higher bound.
        try:
            higher_item = self._tree.ceiling_item(anchor)
        except KeyError:
            higher_item = None
        if higher_item is not None:
            if e is None or higher_item[1][
                    1] is InfinityMarker.INF_MINUS or higher_item[0] < e:
                raise KeyError('Overlapping intervals')

        if e is None:
            e = InfinityMarker.INF_PLUS
        elif s is None:
            e = InfinityMarker.INF_MINUS

        self._tree[anchor] = (value, e)

    def __getitem__(self, key: int) -> V:
        try:
            res = self._tree.floor_item(key)
        except KeyError:
            res = self._tree.ceiling_item(key)
            val, e = res[1]
            if e is InfinityMarker.INF_MINUS:
                return val
            else:
                raise KeyError(key)

        val, e = res[1]
        if (e is InfinityMarker.INF_PLUS
                or (e is InfinityMarker.INF_MINUS and res[0] == key)
                or (e is not InfinityMarker.INF_MINUS and key < e)):
            return val
        else:
            raise KeyError(key)

    def get(self, key, default: D = None) -> Union[V, D]:
        try:
            res = self._tree.floor_item(key)
        except KeyError:
            try:
                res = self._tree.ceiling_item(key)
            except KeyError:
                return default
            val, e = res[1]
            if e is InfinityMarker.INF_MINUS:
                return val
            else:
                return default

        val, e = res[1]
        if (e is InfinityMarker.INF_PLUS
                or (e is InfinityMarker.INF_MINUS and res[0] == key)
                or (e is not InfinityMarker.INF_MINUS and key < e)):
            return val
        else:
            return default

    def __contains__(self, key: int) -> bool:
        try:
            existing = self._tree.floor_item(key)
        except KeyError:
            try:
                existing = self._tree.ceiling_item(key)
            except KeyError:
                return False
            else:
                return existing[1][1] is InfinityMarker.INF_MINUS
        else:
            start, (_, end) = existing
            if end is InfinityMarker.INF_MINUS:
                return start == key
            elif end is InfinityMarker.INF_PLUS:
                return True
            else:
                return key < end
예제 #2
0
파일: roi.py 프로젝트: rusterx/peakonly
def get_ROIs(path, delta_mz=0.005, required_points=15, dropped_points=3, progress_callback=None):
    '''
    :param path: path to mzml file
    :param delta_mz:
    :param required_points:
    :param dropped_points: can be zero points
    :param pbar: an pyQt5 progress bar to visualize
    :return: ROIs - a list of ROI objects found in current file
    '''
    # read all scans in mzML file
    run = pymzml.run.Reader(path)
    scans = []
    for scan in run:
        if scan.ms_level == 1:
            scans.append(scan)

    ROIs = []  # completed ROIs
    process_ROIs = FastAVLTree()  # processed ROIs

    # initialize a processed data
    number = 1  # number of processed scan
    init_scan = scans[0]
    start_time = init_scan.scan_time[0]

    min_mz = max(init_scan.mz)
    max_mz = min(init_scan.mz)
    for mz, i in zip(init_scan.mz, init_scan.i):
        if i != 0:
            process_ROIs[mz] = ProcessROI([1, 1],
                                          [start_time, start_time],
                                          [i],
                                          [mz],
                                          mz)
            min_mz = min(min_mz, mz)
            max_mz = max(max_mz, mz)

    for scan in tqdm(scans):
        if number == 1:  # already processed scan
            number += 1
            continue
        # expand ROI
        for n, mz in enumerate(scan.mz):
            if scan.i[n] != 0:
                ceiling_mz, ceiling_item = None, None
                floor_mz, floor_item = None, None
                if mz < max_mz:
                    _, ceiling_item = process_ROIs.ceiling_item(mz)
                    ceiling_mz = ceiling_item.mzmean
                if mz > min_mz:
                    _, floor_item = process_ROIs.floor_item(mz)
                    floor_mz = floor_item.mzmean
                # choose closest
                if ceiling_mz is None and floor_mz is None:
                    time = scan.scan_time[0]
                    process_ROIs[mz] = ProcessROI([number, number],
                                                  [time, time],
                                                  [scan.i[n]],
                                                  [mz],
                                                  mz)
                    continue
                elif ceiling_mz is None:
                    closest_mz, closest_item = floor_mz, floor_item
                elif floor_mz is None:
                    closest_mz, closest_item = ceiling_mz, ceiling_item
                else:
                    if ceiling_mz - mz > mz - floor_mz:
                        closest_mz, closest_item = floor_mz, floor_item
                    else:
                        closest_mz, closest_item = ceiling_mz, ceiling_item

                if abs(closest_item.mzmean - mz) < delta_mz:
                    roi = closest_item
                    if roi.scan[1] == number:
                        # ROIs is already extended (two peaks in one mz window)
                        roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1)
                        roi.points += 1
                        roi.mz[-1] = (roi.i[-1]*roi.mz[-1] + scan.i[n]*mz) / (roi.i[-1] + scan.i[n])
                        roi.i[-1] = (roi.i[-1] + scan.i[n])
                    else:
                        roi.mzmean = (roi.mzmean * roi.points + mz) / (roi.points + 1)
                        roi.points += 1
                        roi.mz.append(mz)
                        roi.i.append(scan.i[n])
                        roi.scan[1] = number  # show that we extended the roi
                        roi.rt[1] = scan.scan_time[0]
                else:
                    time = scan.scan_time[0]
                    process_ROIs[mz] = ProcessROI([number, number],
                                                  [time, time],
                                                  [scan.i[n]],
                                                  [mz],
                                                  mz)
        # Check and cleanup
        to_delete = []
        for mz, roi in process_ROIs.items():
            if roi.scan[1] < number <= roi.scan[1] + dropped_points:
                # insert 'zero' in the end
                roi.mz.append(roi.mzmean)
                roi.i.append(0)
            elif roi.scan[1] != number:
                to_delete.append(mz)
                if roi.points >= required_points:
                    ROIs.append(ROI(
                        roi.scan,
                        roi.rt,
                        roi.i,
                        roi.mz,
                        roi.mzmean
                    ))
        process_ROIs.remove_items(to_delete)
        try:
            min_mz, _ = process_ROIs.min_item()
            max_mz, _ = process_ROIs.max_item()
        except ValueError:
            min_mz = float('inf')
            max_mz = 0
        number += 1
        if progress_callback is not None and not number % 10:
            progress_callback.emit(int(number * 100 / len(scans)))
    # add final rois
    for mz, roi in process_ROIs.items():
        if roi.points >= required_points:
            for n in range(dropped_points - (number - 1 - roi.scan[1])):
                # insert 'zero' in the end
                roi.mz.append(roi.mzmean)
                roi.i.append(0)
            ROIs.append(ROI(
                        roi.scan,
                        roi.rt,
                        roi.i,
                        roi.mz,
                        roi.mzmean
                        ))
    # expand constructed roi
    for roi in ROIs:
        for n in range(dropped_points):
            # insert in the begin
            roi.i.insert(0, 0)
            roi.mz.insert(0, roi.mzmean)
        # change scan numbers (necessary for future matching)
        roi.scan = (roi.scan[0] - dropped_points, roi.scan[1] + dropped_points)
        assert roi.scan[1] - roi.scan[0] == len(roi.i) - 1
    return ROIs