Esempio n. 1
0
 def test_project_into_range(self):
     tStart = array([1, 1, 1, 1, 1, 2, 2, 2, 2, 10, 20])
     tEnd = array([2, 3, 4, 5, 6, 3, 4, 5, 6, 15, 25])
     assert_equal(
         True,
         all(
             RQ.projectIntoRange(tStart, tEnd, 1, 6) == array(
                 [5, 8, 6, 4, 2])))
     assert_equal(
         True,
         all(
             RQ.projectIntoRange(tStart, tEnd, 20, 26) == array(
                 [1, 1, 1, 1, 1, 0])))
Esempio n. 2
0
def coverageInWindow(refWin, hits):
    winId, winStart, winEnd = refWin
    a = np.array([(hit.referenceStart, hit.referenceEnd) for hit in hits
                  if hit.referenceName == winId])
    tStart = a[:, 0]
    tEnd = a[:, 1]
    cov = projectIntoRange(tStart, tEnd, winStart, winEnd)
    return cov
Esempio n. 3
0
def coverageInWindow(refWin, hits):
    winId, winStart, winEnd = refWin
    a = np.array([(hit.referenceStart, hit.referenceEnd)
                  for hit in hits
                  if hit.referenceName == winId])
    tStart = a[:,0]
    tEnd   = a[:,1]
    cov = projectIntoRange(tStart, tEnd, winStart, winEnd)
    return cov
Esempio n. 4
0
def coverageInWindow(refWin, hits):
    winId, winStart, winEnd = refWin
    a = np.array([(hit.referenceStart, hit.referenceEnd) for hit in hits
                  if hit.referenceName == winId])
    if len(a) == 0:
        return np.zeros(winEnd - winStart, dtype=np.uint)
    else:
        tStart = a[:, 0]
        tEnd = a[:, 1]
        cov = projectIntoRange(tStart, tEnd, winStart, winEnd)
        return cov
Esempio n. 5
0
def coverageInWindow(refWin, hits):
    winId, winStart, winEnd = refWin
    a = np.array([(hit.referenceStart, hit.referenceEnd)
                  for hit in hits
                  if hit.referenceName == winId])
    if len(a) == 0:
        return np.zeros(winEnd - winStart, dtype=np.uint)
    else:
        tStart = a[:,0]
        tEnd   = a[:,1]
        cov = projectIntoRange(tStart, tEnd, winStart, winEnd)
        return cov
Esempio n. 6
0
def kSpannedIntervals(refWindow, k, start, end, minLength=0):
    """
    Find intervals in the window that are k-spanned by the reads.

    Given:
     `refWindow`: the window under consideration
     `k`: the number of reads that must span intervals to be returned
     `start`, `end`: numpy arrays of start and end coordinates for reads,
       where the extent of each read is [start, end).  Must be ordered
       so that `start` is sorted in ascending order.

    Find a maximal set of maximal disjoint intervals within
    refWindow such that each interval is spanned by at least k reads.
    Intervals are returned in sorted order, as a list of (start, end)
    tuples.

    Note that this is a greedy search procedure and may not always
    return the optimal solution, in some sense.  However it will
    always return the optimal solutions in the most common cases.
    """
    assert k >= 1
    winId, winStart_, winEnd_ = refWindow

    # Truncate to bounds implied by refWindow
    start = np.clip(start, winStart_, winEnd_)
    end = np.clip(end, winStart_, winEnd_)

    # Translate the start, end to coordinate system where
    # refWindow.start is 0.
    start = start - winStart_
    end   = end - winStart_
    winStart = 0
    winEnd   = winEnd_ - winStart_

    positions = np.arange(winEnd - winStart, dtype=int)
    coverage = projectIntoRange(start, end,
                                winStart, winEnd)
    x = -1
    y = 0
    intervalsFound = []

    while y < winEnd:
        # Step 1: let x be the first pos >= y that is k-covered
        eligible = np.flatnonzero((positions >= y) & (coverage >= k))
        if len(eligible) > 0:
            x = eligible[0]
        else:
            break

        # Step 2: extend the window [x, y) until [x, y) is no longer
        # k-spanned.  Do this by setting y to the k-th largest `end`
        # among reads covering x
        eligible = end[(start <= x)]
        eligible.sort()
        if len(eligible) >= k:
            y = eligible[-k]
        else:
            break

        intervalsFound.append((x, y))

    # Translate intervals back
    return [ (s + winStart_,
              e + winStart_)
             for (s, e) in intervalsFound
             if e - s >= minLength ]
Esempio n. 7
0
def kSpannedIntervals(refWindow, k, start, end, minLength=0):
    """
    Find intervals in the window that are k-spanned by the reads.

    Given:
     `refWindow`: the window under consideration
     `k`: the number of reads that must span intervals to be returned
     `start`, `end`: numpy arrays of start and end coordinates for reads,
       where the extent of each read is [start, end).  Must be ordered
       so that `start` is sorted in ascending order.

    Find a maximal set of maximal disjoint intervals within
    refWindow such that each interval is spanned by at least k reads.
    Intervals are returned in sorted order, as a list of (start, end)
    tuples.

    Note that this is a greedy search procedure and may not always
    return the optimal solution, in some sense.  However it will
    always return the optimal solutions in the most common cases.
    """
    assert k >= 1
    winId, winStart_, winEnd_ = refWindow

    # Truncate to bounds implied by refWindow
    start = np.clip(start, winStart_, winEnd_)
    end = np.clip(end, winStart_, winEnd_)

    # Translate the start, end to coordinate system where
    # refWindow.start is 0.
    start = start - winStart_
    end = end - winStart_
    winStart = 0
    winEnd = winEnd_ - winStart_

    positions = np.arange(winEnd - winStart, dtype=int)
    coverage = projectIntoRange(start, end, winStart, winEnd)
    x = -1
    y = 0
    intervalsFound = []

    while y < winEnd:
        # Step 1: let x be the first pos >= y that is k-covered
        eligible = np.flatnonzero((positions >= y) & (coverage >= k))
        if len(eligible) > 0:
            x = eligible[0]
        else:
            break

        # Step 2: extend the window [x, y) until [x, y) is no longer
        # k-spanned.  Do this by setting y to the k-th largest `end`
        # among reads covering x
        eligible = end[(start <= x)]
        eligible.sort()
        if len(eligible) >= k:
            y = eligible[-k]
        else:
            break

        intervalsFound.append((x, y))

    # Translate intervals back
    return [(s + winStart_, e + winStart_) for (s, e) in intervalsFound
            if e - s >= minLength]
 def test_project_into_range(self):
     tStart = array([1,1,1,1,1,2,2,2,2,10,20])
     tEnd   = array([2,3,4,5,6,3,4,5,6,15,25])
     assert_equal(True, all(RQ.projectIntoRange(tStart, tEnd, 1, 6) == array([5, 8, 6, 4, 2])))
     assert_equal(True, all(RQ.projectIntoRange(tStart, tEnd, 20, 26) == array([1, 1, 1, 1, 1, 0])))