def test_lt(self): self.assertLess(self.position2, self.position1) self.assertGreater(self.position1, self.position2) self.assertLess(self.position1, self.position3) chr5_position = Position(chromosome="chr5", coordinate=5, strand=Strand.FORWARD) chr10_position = Position(chromosome="chr10", coordinate=1, strand=Strand.REVERSE) self.assertLess(self.position1, self.position4) self.assertLess(chr5_position, chr10_position)
def test_creation(self): position3 = Position(chromosome="chr1", coordinate=1, strand=Strand.FORWARD) position4 = Position(chromosome="chr2", coordinate=2, strand=Strand.FORWARD) position5 = Position(chromosome="chr1", coordinate=0, strand=Strand.FORWARD) for pos in [position3, position4, position5]: with self.assertRaises(ValueError): Segment(start_position=self.position1, end_position=pos) Segment(start_position=self.position1, end_position=self.position2)
def adjacencies_mergeable(adjacency1, adjacency2, max_distance=500, separate_distances=True): distance11 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position1, pos2=adjacency2.position1) distance12 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position2, pos2=adjacency2.position2) distance21 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position2, pos2=adjacency2.position1) distance22 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position1, pos2=adjacency2.position2) if not separate_distances: return min(distance11 + distance12, distance21 + distance22) <= max_distance case1 = distance11 <= max_distance and distance12 <= max_distance case2 = distance21 <= max_distance and distance22 <= max_distance return case1 or case2
def test_eq(self): non_position = "?" eq_position1 = Position(chromosome="chr1", coordinate=1, strand=Strand.FORWARD) self.assertNotEqual(self.position1, self.position2) self.assertNotEqual(self.position1, self.position3) self.assertNotEqual(self.position1, self.position4) self.assertNotEqual(self.position1, non_position) self.assertEqual(self.position1, eq_position1)
def get_merged_adjacencies(self, merged_template="{cnt}_merged"): result = [] processed_ids = set() cnt = 0 for lp, rp in self.merged_position_pairs: lp_chr, lp_strand, lp_coord = lp rp_chr, rp_strand, rp_coord = rp lp_ids = self.get_adjs_ids_by_position(chromosome=lp_chr, strand=lp_strand, coordinate=lp_coord) rp_ids = self.get_adjs_ids_by_position(chromosome=rp_chr, strand=rp_strand, coordinate=rp_coord) common_ids = lp_ids & rp_ids assert len(common_ids) > 0 assert len(common_ids & processed_ids) == 0 processed_ids.update(common_ids) pos1 = Position(chromosome=lp_chr, coordinate=lp_coord, strand=lp_strand) pos2 = Position(chromosome=rp_chr, coordinate=rp_coord, strand=rp_strand) adj = Adjacency(position1=pos1, position2=pos2, extra={ EXTERNAL_NA_ID: merged_template.format(cnt=cnt), self.origin_ids_field: ",".join(common_ids), }) result.append(adj) cnt += 1 return result
def setUp(self): self.p1 = Position(chromosome="1", coordinate=1, strand=Strand.REVERSE) self.p2 = Position(chromosome="1", coordinate=2, strand=Strand.FORWARD) self.p3 = Position(chromosome="1", coordinate=3, strand=Strand.REVERSE) self.p4 = Position(chromosome="1", coordinate=4, strand=Strand.FORWARD) self.p5 = Position(chromosome="1", coordinate=5, strand=Strand.REVERSE) self.p6 = Position(chromosome="1", coordinate=6, strand=Strand.FORWARD) self.p7 = Position(chromosome="2", coordinate=1, strand=Strand.REVERSE) self.p8 = Position(chromosome="2", coordinate=2, strand=Strand.FORWARD) self.p9 = Position(chromosome="2", coordinate=3, strand=Strand.REVERSE) self.p10 = Position(chromosome="2", coordinate=4, strand=Strand.FORWARD) self.p11 = Position(chromosome="2", coordinate=5, strand=Strand.REVERSE) self.p12 = Position(chromosome="2", coordinate=6, strand=Strand.FORWARD) self.s1 = Segment(start_position=self.p1, end_position=self.p2) self.s2 = Segment(start_position=self.p3, end_position=self.p4) self.s3 = Segment(start_position=self.p5, end_position=self.p6) self.s4 = Segment(start_position=self.p7, end_position=self.p8) self.s5 = Segment(start_position=self.p9, end_position=self.p10) self.s6 = Segment(start_position=self.p11, end_position=self.p12)
def setUp(self): self.position1 = Position(chromosome="chr1", coordinate=1, strand=Strand.REVERSE) self.position2 = Position(chromosome="chr1", coordinate=2, strand=Strand.FORWARD)
def test_empty_extra_creation(self): self.assertDictEqual(Position(chromosome="chrom1", coordinate=1, strand=Strand.FORWARD).extra, {})
def refined_segments(segments, additional_positions=None, additional_positions_by_chrs=None): fragments = deepcopy(segments) if additional_positions is None: additional_positions = [] if additional_positions_by_chrs is None: additional_positions_by_chrs = defaultdict(list) refined_segments = [] segments_ids_mapping = defaultdict(list) source_fragments_by_chrs = defaultdict(list) for fragment in fragments: source_fragments_by_chrs[fragment.chromosome].append(fragment) for position in additional_positions: additional_positions_by_chrs[position.chromosome].append(position) for chr_name in list(source_fragments_by_chrs.keys()): source_fragments_by_chrs[chr_name] = sorted( source_fragments_by_chrs[chr_name], key=lambda s: (s.start_coordinate, s.end_coordinate)) if not sorted_segments_donot_overlap( segments=source_fragments_by_chrs[chr_name]): raise ValueError( "Some segments overlap on chromosome {chr_name}.".format( chr_name=chr_name)) for chr_name in list(additional_positions_by_chrs.keys()): additional_positions_by_chrs[chr_name] = sorted( additional_positions_by_chrs[chr_name], key=lambda p: (p.coordinate, p.strand)) for chr_name in source_fragments_by_chrs: chr_fragments = iter(source_fragments_by_chrs[chr_name]) chr_positions = iter(additional_positions_by_chrs[chr_name]) current_fragment = next(chr_fragments, None) current_position = next(chr_positions, None) current_segment = deepcopy(current_fragment) refined_segments.append(current_segment) new_to_old = [] while current_fragment is not None and current_position is not None: if current_position.coordinate < current_fragment.start_coordinate: current_position = next(chr_positions, None) elif current_position.coordinate == current_segment.start_coordinate and current_position.strand == Strand.REVERSE: current_position = next(chr_positions, None) elif current_position.coordinate == current_segment.end_coordinate and current_position.strand == Strand.FORWARD: current_position = next(chr_positions, None) elif current_position.coordinate <= current_fragment.end_coordinate: if current_position.strand == Strand.FORWARD: left_partition_coordinate = current_position.coordinate else: left_partition_coordinate = current_position.coordinate - 1 right_partition_coordinate = left_partition_coordinate + 1 new_end_position = Position( chromosome=chr_name, coordinate=left_partition_coordinate, strand=Strand.FORWARD) new_start_position = Position( chromosome=chr_name, coordinate=right_partition_coordinate, strand=Strand.REVERSE) current_segment.end_position = new_end_position new_to_old.append(current_segment.stable_id_non_hap) current_segment = deepcopy(current_fragment) current_segment.start_position = new_start_position refined_segments.append(current_segment) current_position = next(chr_positions, None) elif current_position.coordinate > current_fragment.end_coordinate: new_to_old.append(current_segment.stable_id_non_hap) current_fragment_id = current_fragment.stable_id_non_hap for sid in new_to_old: segments_ids_mapping[current_fragment_id].append(sid) segments_ids_mapping[sid].append(current_fragment_id) current_fragment = next(chr_fragments, None) current_segment = deepcopy(current_fragment) if current_fragment is not None: refined_segments.append(current_segment) new_to_old = [] else: raise ValueError("Something went wrong") if current_fragment is not None and current_segment is not None: new_to_old.append(current_segment.stable_id_non_hap) current_fragment_id = current_fragment.stable_id_non_hap for sid in new_to_old: segments_ids_mapping[current_fragment_id].append(sid) segments_ids_mapping[sid].append(current_fragment_id) current_fragment = next(chr_fragments, None) current_segment = deepcopy(current_fragment) while current_fragment is not None: sid = current_segment.stable_id_non_hap segments_ids_mapping[sid].append(sid) refined_segments.append(current_segment) current_fragment = next(chr_fragments, None) current_segment = deepcopy(current_fragment) return refined_segments, segments_ids_mapping