def test_get_insertion_overlap_positions(): blocks = [ GenomeInterval(1, 0, 100), # 01 GenomeInterval(1, 100, 200), # 23 GenomeInterval(1, 210, 300), # 45 GenomeInterval(1, 350, 360), # 67 GenomeInterval(1, 370, 400), # 89 GenomeInterval(1, 0, 100, True), # 10, 11 GenomeInterval(1, 0, 10, True) ] # 12, 13 paths = (list(range(10)), [0, 1, 10, 11, 2, 3], [0, 1, 2, 3, 10, 11, 2, 3], [0, 1, 2, 3, 12, 13, 2, 3], [0, 1, 2, 3, 4, 5, 10, 11, 6, 7], [0, 1, 2, 3, 4, 5, 12, 13, 6, 7]) truth = [ tuple(), ((80, 170), ), ((185, 275), ), tuple(), ((305, 395), ), tuple() ] rlen = 50 m = 20 for i in range(len(truth)): out, _, _ = get_insertion_overlap_positions(paths[i], blocks, rlen, m) inter = pyinter.IntervalSet() for interval in truth[i]: inter.add(pyinter.open(interval[0], interval[1])) print('truth: {0}\nresult: {1}\n'.format(inter, out)) assert (out == inter) blocks = [ GenomeInterval(1, 0, 100), GenomeInterval(1, 200, 300), GenomeInterval(0, 350, 400), GenomeInterval(1, 0, 50, True), GenomeInterval(1, 0, 50, True) ] path = [0, 1, 6, 7, 2, 3, 8, 9, 4, 5] truth = [(130, 170), (355, 395)] out, _, _ = get_insertion_overlap_positions(path, blocks, rlen, m) inter = pyinter.IntervalSet() for interval in truth: inter.add(pyinter.open(interval[0], interval[1])) print('truth: {0}\nresult: {1}\n'.format(inter, out)) assert (out == inter)
def get_gap_overlap_positions(path, blocks, read_len, min_mappable=20): blocks_gaps = genome_blocks_gaps(blocks, path) m = min_mappable gap_ref = pyinter.IntervalSet() ref = pyinter.IntervalSet() pos = 0 for b in blocks_gaps: if len(b) == 0: continue if not b.is_insertion(): gap_ref.add(pyinter.closedopen(pos, pos + len(b))) if not b.is_gap: ref.add(pyinter.closedopen(pos, pos + len(b))) pos += len(b) # print('gap_ref: {0}\nref: {1}\n'.format(gap_ref, ref)) A1 = pyinter.IntervalSet() # i: [i, i+m) contained in gap_ref A2 = pyinter.IntervalSet() # i: [i, i+m) overlaps ref for iv in gap_ref: if iv.lower_value <= iv.upper_value - m: A1.add(pyinter.closed(iv.lower_value, iv.upper_value - m)) for iv in ref: # print(iv) A2.add(pyinter.closed(iv.lower_value - m + 1, iv.upper_value - 1)) # print(A2) A3 = A1.intersection(A2) A4 = pyinter.IntervalSet() A5 = pyinter.IntervalSet() for iv in A1: A4.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value)) for iv in A3: A5.add(pyinter.closed(iv.lower_value - read_len + m, iv.upper_value)) result = A4.difference(A5) # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5)) # print('result: {0}'.format(result)) # print('') # remove any empty intervals out = pyinter.IntervalSet() for iv in result: a = iv.lower_value - 1 if iv.lower_value in iv else iv.lower_value b = iv.upper_value + 1 if iv.upper_value in iv else iv.upper_value # if iv.lower_value in iv or iv.upper_value in iv: # not open # print('A1: {0}\nA2: {1}\nA3: {2}\nA4: {3}\nA5: {4}\n'.format(A1, A2, A3, A4, A5)) # print('result: {0}'.format(result)) # print(iv) # raise Warning('non-open interval in get_gap_positions') if a < b - 1: out.add(pyinter.open(a, b)) return out
def get_insertion_overlap_positions(path, blocks, read_len, min_mappable=20): invalid_read_start_d = pyinter.IntervalSet() invalid_read_start_t = pyinter.IntervalSet() invalid_window_start = pyinter.IntervalSet() m = min_mappable R = read_len pos = 0 blocks_gaps = genome_blocks_gaps(blocks, path) for b in blocks_gaps: if b.is_de_novo and 0 < len(b) - R + 2 * m: invalid_read_start_d.add( pyinter.open(pos - m, pos + len(b) - R + m)) elif b.is_translocation and 0 < len(b) - R + 2 * m: invalid_read_start_t.add( pyinter.open(pos - m, pos + len(b) - R + m)) if b.is_insertion(): invalid_window_start.add(pyinter.open(pos - m, pos + len(b))) pos += len(b) invalid_read_start = pyinter.IntervalSet() # weird code here with window_start is required to merge intervals properly for interval in invalid_window_start: if interval.lower_value < interval.upper_value - (R - m): invalid_read_start.add( pyinter.open(interval.lower_value, interval.upper_value - (R - m))) # print(invalid_read_start_d) # print(invalid_read_start_t) # invalid_d_only = invalid_read_start_d.difference(invalid_read_start_t) # invalid_t_only = invalid_read_start_t.difference(invalid_read_start_d) # invalid_both = invalid_read_start_d.intersection(invalid_read_start_t) overlapping_t, overlapping_d = [], [] for interval in invalid_read_start: if any([d.overlaps(interval) for d in invalid_read_start_d]): overlapping_d.append(True) else: overlapping_d.append(False) if any([t.overlaps(interval) for t in invalid_read_start_t]): overlapping_t.append(True) else: overlapping_t.append(False) return invalid_read_start, overlapping_d, overlapping_t
def find_potential_solutions(self, range_begin: date, range_end: date) -> Set[GroupAvailability]: EiCT = EntityInClassTemplate class Event(object): def __init__(self, timestamp: TimeStamp, interval: Interval, islower: bool, eict: EiCT): self.timestamp = timestamp self.interval = interval self.islower = islower self.eict = eict # Make a sorted list of the availability events for all involved entities: eicts = [] # type: List[EiCT] events = [] # type: List[Event] eicts.extend(self.personinclasstemplate_set.all()) eicts.extend(self.resourceinclasstemplate_set.all()) for eict in eicts: # EiCT ivalset = eict.person.get_availability( range_begin, range_end) # type: IntervalSet for ival in ivalset: # type: Interval lower_evt = Event(ival.lower_value, ival, True, eict) upper_evt = Event(ival.upper_value, ival, False, eict) events.append(lower_evt) events.append(upper_evt) events = sorted(events, key=lambda x: x.timestamp) # type: List[Event] # Run through the events, finding simultaneously available involved entities. results = set() # type: Set[GroupAvailability] currset = set() for event in events: # type: Event # Adjust the current set as necessary. action = currset.add if event.islower else currset.remove action((event.eict, event.interval)) # Find the intersection of the currset. candidate_timespan = open(NEGATIVE_INFINITY, INFINITY) # type: Interval candidate_eicts = [] for (eict, ival) in currset: candidate_timespan = candidate_timespan.intersect(ival) candidate_eicts.append(eict) ga = GroupAvailability(candidate_eicts, candidate_timespan) if self.is_potential_solution(ga): results.add(ga) # print(ga) return results
def test_get_gap_overlap_positions(): rlen = 50 blocks = [ GenomeInterval(1, 0, 100), GenomeInterval(1, 100, 200), GenomeInterval(1, 249, 300), GenomeInterval(1, 350, 400), GenomeInterval(1, 500, 600) ] paths = ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4, 5, 7, 6, 8, 9]) truth = ([(299, 301), (399, 451)], [(299, 326), (424, 451)]) for i in range(len(truth)): out = get_gap_overlap_positions(paths[i], blocks, rlen) inter = pyinter.IntervalSet() for interval in truth[i]: inter.add(pyinter.open(interval[0], interval[1])) print('truth: {0}\nresult: {1}\n'.format(inter, out)) assert (out == inter) blocks = [ GenomeInterval(1, 0, 100), GenomeInterval(1, 200, 300), GenomeInterval(0, 350, 400), GenomeInterval(1, 0, 50, True), GenomeInterval(1, 0, 50, True) ] path = [0, 1, 6, 7, 2, 3, 8, 9, 4, 5] truth = [(99, 131), (169, 201), (349, 356), (394, 401)] out = get_gap_overlap_positions(path, blocks, rlen) inter = pyinter.IntervalSet() for interval in truth: inter.add(pyinter.open(interval[0], interval[1])) print('truth: {0}\nresult: {1}\n'.format(inter, out)) assert (out == inter)