Beispiel #1
0
def test_get_insertion_overlap_positions():
    blocks = [
        GenomeInterval(1, 0, 100),  # 01
        GenomeInterval(1, 100, 200),  # 23
        GenomeInterval(1, 210, 300),  # 45
        GenomeInterval(1, 350, 360),  # 67
        GenomeInterval(1, 370, 400),  # 89
        GenomeInterval(1, 0, 100, True),  # 10, 11
        GenomeInterval(1, 0, 10, True)
    ]  # 12, 13
    paths = (list(range(10)), [0, 1, 10, 11, 2, 3], [0, 1, 2, 3, 10, 11, 2, 3],
             [0, 1, 2, 3, 12, 13, 2, 3], [0, 1, 2, 3, 4, 5, 10, 11, 6,
                                          7], [0, 1, 2, 3, 4, 5, 12, 13, 6, 7])
    truth = [
        tuple(), ((80, 170), ), ((185, 275), ),
        tuple(), ((305, 395), ),
        tuple()
    ]
    rlen = 50
    m = 20

    for i in range(len(truth)):
        out, _, _ = get_insertion_overlap_positions(paths[i], blocks, rlen, m)
        inter = pyinter.IntervalSet()
        for interval in truth[i]:
            inter.add(pyinter.open(interval[0], interval[1]))
        print('truth: {0}\nresult: {1}\n'.format(inter, out))
        assert (out == inter)

    blocks = [
        GenomeInterval(1, 0, 100),
        GenomeInterval(1, 200, 300),
        GenomeInterval(0, 350, 400),
        GenomeInterval(1, 0, 50, True),
        GenomeInterval(1, 0, 50, True)
    ]
    path = [0, 1, 6, 7, 2, 3, 8, 9, 4, 5]
    truth = [(130, 170), (355, 395)]
    out, _, _ = get_insertion_overlap_positions(path, blocks, rlen, m)
    inter = pyinter.IntervalSet()
    for interval in truth:
        inter.add(pyinter.open(interval[0], interval[1]))
    print('truth: {0}\nresult: {1}\n'.format(inter, out))
    assert (out == inter)
Beispiel #2
0
def get_gap_overlap_positions(path, blocks, read_len, min_mappable=20):
    blocks_gaps = genome_blocks_gaps(blocks, path)
    m = min_mappable

    gap_ref = pyinter.IntervalSet()
    ref = pyinter.IntervalSet()
    pos = 0
    for b in blocks_gaps:
        if len(b) == 0:
            continue
        if not b.is_insertion():
            gap_ref.add(pyinter.closedopen(pos, pos + len(b)))
            if not b.is_gap:
                ref.add(pyinter.closedopen(pos, pos + len(b)))
        pos += len(b)
    # print('gap_ref: {0}\nref: {1}\n'.format(gap_ref, ref))

    A1 = pyinter.IntervalSet()  # i: [i, i+m) contained in gap_ref
    A2 = pyinter.IntervalSet()  # i: [i, i+m) overlaps ref
    for iv in gap_ref:
        if iv.lower_value <= iv.upper_value - m:
            A1.add(pyinter.closed(iv.lower_value, iv.upper_value - m))
    for iv in ref:
        # print(iv)
        A2.add(pyinter.closed(iv.lower_value - m + 1, iv.upper_value - 1))
        # print(A2)

    A3 = A1.intersection(A2)

    A4 = pyinter.IntervalSet()
    A5 = pyinter.IntervalSet()
    for iv in A1:
        A4.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value))
    for iv in A3:
        A5.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value))

    result = A4.difference(A5)

    # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5))
    # print('result: {0}'.format(result))
    # print('')

    # remove any empty intervals
    out = pyinter.IntervalSet()
    for iv in result:
        a = iv.lower_value - 1 if iv.lower_value in iv else iv.lower_value
        b = iv.upper_value + 1 if iv.upper_value in iv else iv.upper_value
        # if iv.lower_value in iv or iv.upper_value in iv: # not open
        #     print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5))
        #     print('result: {0}'.format(result))
        #     print(iv)
        #     raise Warning('non-open interval in get_gap_positions')
        if a < b - 1:
            out.add(pyinter.open(a, b))
    return out
Beispiel #3
0
def get_insertion_overlap_positions(path, blocks, read_len, min_mappable=20):
    invalid_read_start_d = pyinter.IntervalSet()
    invalid_read_start_t = pyinter.IntervalSet()
    invalid_window_start = pyinter.IntervalSet()
    m = min_mappable
    R = read_len
    pos = 0

    blocks_gaps = genome_blocks_gaps(blocks, path)
    for b in blocks_gaps:
        if b.is_de_novo and 0 < len(b) - R + 2 * m:
            invalid_read_start_d.add(
                pyinter.open(pos - m, pos + len(b) - R + m))
        elif b.is_translocation and 0 < len(b) - R + 2 * m:
            invalid_read_start_t.add(
                pyinter.open(pos - m, pos + len(b) - R + m))
        if b.is_insertion():
            invalid_window_start.add(pyinter.open(pos - m, pos + len(b)))
        pos += len(b)
    invalid_read_start = pyinter.IntervalSet()
    # weird code here with window_start is required to merge intervals properly
    for interval in invalid_window_start:
        if interval.lower_value < interval.upper_value - (R - m):
            invalid_read_start.add(
                pyinter.open(interval.lower_value,
                             interval.upper_value - (R - m)))
    # print(invalid_read_start_d)
    # print(invalid_read_start_t)
    # invalid_d_only = invalid_read_start_d.difference(invalid_read_start_t)
    # invalid_t_only = invalid_read_start_t.difference(invalid_read_start_d)
    # invalid_both = invalid_read_start_d.intersection(invalid_read_start_t)
    overlapping_t, overlapping_d = [], []
    for interval in invalid_read_start:
        if any([d.overlaps(interval) for d in invalid_read_start_d]):
            overlapping_d.append(True)
        else:
            overlapping_d.append(False)
        if any([t.overlaps(interval) for t in invalid_read_start_t]):
            overlapping_t.append(True)
        else:
            overlapping_t.append(False)
    return invalid_read_start, overlapping_d, overlapping_t
Beispiel #4
0
    def find_potential_solutions(self, range_begin: date,
                                 range_end: date) -> Set[GroupAvailability]:

        EiCT = EntityInClassTemplate

        class Event(object):
            def __init__(self, timestamp: TimeStamp, interval: Interval,
                         islower: bool, eict: EiCT):
                self.timestamp = timestamp
                self.interval = interval
                self.islower = islower
                self.eict = eict

        # Make a sorted list of the availability events for all involved entities:
        eicts = []  # type: List[EiCT]
        events = []  # type: List[Event]
        eicts.extend(self.personinclasstemplate_set.all())
        eicts.extend(self.resourceinclasstemplate_set.all())
        for eict in eicts:  # EiCT
            ivalset = eict.person.get_availability(
                range_begin, range_end)  # type: IntervalSet
            for ival in ivalset:  # type: Interval
                lower_evt = Event(ival.lower_value, ival, True, eict)
                upper_evt = Event(ival.upper_value, ival, False, eict)
                events.append(lower_evt)
                events.append(upper_evt)
        events = sorted(events, key=lambda x: x.timestamp)  # type: List[Event]

        # Run through the events, finding simultaneously available involved entities.
        results = set()  # type: Set[GroupAvailability]
        currset = set()
        for event in events:  # type: Event

            # Adjust the current set as necessary.
            action = currset.add if event.islower else currset.remove
            action((event.eict, event.interval))

            # Find the intersection of the currset.
            candidate_timespan = open(NEGATIVE_INFINITY,
                                      INFINITY)  # type: Interval
            candidate_eicts = []
            for (eict, ival) in currset:
                candidate_timespan = candidate_timespan.intersect(ival)
                candidate_eicts.append(eict)
            ga = GroupAvailability(candidate_eicts, candidate_timespan)
            if self.is_potential_solution(ga):
                results.add(ga)
                # print(ga)

        return results
Beispiel #5
0
def test_get_gap_overlap_positions():
    rlen = 50
    blocks = [
        GenomeInterval(1, 0, 100),
        GenomeInterval(1, 100, 200),
        GenomeInterval(1, 249, 300),
        GenomeInterval(1, 350, 400),
        GenomeInterval(1, 500, 600)
    ]

    paths = ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 7, 6, 8, 9])
    truth = ([(299, 301), (399, 451)], [(299, 326), (424, 451)])

    for i in range(len(truth)):
        out = get_gap_overlap_positions(paths[i], blocks, rlen)
        inter = pyinter.IntervalSet()
        for interval in truth[i]:
            inter.add(pyinter.open(interval[0], interval[1]))
        print('truth: {0}\nresult: {1}\n'.format(inter, out))
        assert (out == inter)

    blocks = [
        GenomeInterval(1, 0, 100),
        GenomeInterval(1, 200, 300),
        GenomeInterval(0, 350, 400),
        GenomeInterval(1, 0, 50, True),
        GenomeInterval(1, 0, 50, True)
    ]

    path = [0, 1, 6, 7, 2, 3, 8, 9, 4, 5]
    truth = [(99, 131), (169, 201), (349, 356), (394, 401)]
    out = get_gap_overlap_positions(path, blocks, rlen)
    inter = pyinter.IntervalSet()
    for interval in truth:
        inter.add(pyinter.open(interval[0], interval[1]))
    print('truth: {0}\nresult: {1}\n'.format(inter, out))
    assert (out == inter)