Example #1
0
 def test_lt(self):
     self.assertLess(self.position2, self.position1)
     self.assertGreater(self.position1, self.position2)
     self.assertLess(self.position1, self.position3)
     chr5_position = Position(chromosome="chr5", coordinate=5, strand=Strand.FORWARD)
     chr10_position = Position(chromosome="chr10", coordinate=1, strand=Strand.REVERSE)
     self.assertLess(self.position1, self.position4)
     self.assertLess(chr5_position, chr10_position)
Example #2
0
 def test_creation(self):
     position3 = Position(chromosome="chr1", coordinate=1, strand=Strand.FORWARD)
     position4 = Position(chromosome="chr2", coordinate=2, strand=Strand.FORWARD)
     position5 = Position(chromosome="chr1", coordinate=0, strand=Strand.FORWARD)
     for pos in [position3, position4, position5]:
         with self.assertRaises(ValueError):
             Segment(start_position=self.position1, end_position=pos)
     Segment(start_position=self.position1, end_position=self.position2)
Example #3
0
 def adjacencies_mergeable(adjacency1, adjacency2, max_distance=500, separate_distances=True):
     distance11 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position1, pos2=adjacency2.position1)
     distance12 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position2, pos2=adjacency2.position2)
     distance21 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position2, pos2=adjacency2.position1)
     distance22 = Position.non_hap_distance_strand_specific(pos1=adjacency1.position1, pos2=adjacency2.position2)
     if not separate_distances:
         return min(distance11 + distance12, distance21 + distance22) <= max_distance
     case1 = distance11 <= max_distance and distance12 <= max_distance
     case2 = distance21 <= max_distance and distance22 <= max_distance
     return case1 or case2
Example #4
0
 def test_eq(self):
     non_position = "?"
     eq_position1 = Position(chromosome="chr1", coordinate=1, strand=Strand.FORWARD)
     self.assertNotEqual(self.position1, self.position2)
     self.assertNotEqual(self.position1, self.position3)
     self.assertNotEqual(self.position1, self.position4)
     self.assertNotEqual(self.position1, non_position)
     self.assertEqual(self.position1, eq_position1)
Example #5
0
 def get_merged_adjacencies(self, merged_template="{cnt}_merged"):
     result = []
     processed_ids = set()
     cnt = 0
     for lp, rp in self.merged_position_pairs:
         lp_chr, lp_strand, lp_coord = lp
         rp_chr, rp_strand, rp_coord = rp
         lp_ids = self.get_adjs_ids_by_position(chromosome=lp_chr, strand=lp_strand, coordinate=lp_coord)
         rp_ids = self.get_adjs_ids_by_position(chromosome=rp_chr, strand=rp_strand, coordinate=rp_coord)
         common_ids = lp_ids & rp_ids
         assert len(common_ids) > 0
         assert len(common_ids & processed_ids) == 0
         processed_ids.update(common_ids)
         pos1 = Position(chromosome=lp_chr, coordinate=lp_coord, strand=lp_strand)
         pos2 = Position(chromosome=rp_chr, coordinate=rp_coord, strand=rp_strand)
         adj = Adjacency(position1=pos1, position2=pos2, extra={
             EXTERNAL_NA_ID: merged_template.format(cnt=cnt),
             self.origin_ids_field: ",".join(common_ids),
         })
         result.append(adj)
         cnt += 1
     return result
Example #6
0
    def setUp(self):
        self.p1 = Position(chromosome="1", coordinate=1, strand=Strand.REVERSE)
        self.p2 = Position(chromosome="1", coordinate=2, strand=Strand.FORWARD)
        self.p3 = Position(chromosome="1", coordinate=3, strand=Strand.REVERSE)
        self.p4 = Position(chromosome="1", coordinate=4, strand=Strand.FORWARD)
        self.p5 = Position(chromosome="1", coordinate=5, strand=Strand.REVERSE)
        self.p6 = Position(chromosome="1", coordinate=6, strand=Strand.FORWARD)

        self.p7 = Position(chromosome="2", coordinate=1, strand=Strand.REVERSE)
        self.p8 = Position(chromosome="2", coordinate=2, strand=Strand.FORWARD)
        self.p9 = Position(chromosome="2", coordinate=3, strand=Strand.REVERSE)
        self.p10 = Position(chromosome="2",
                            coordinate=4,
                            strand=Strand.FORWARD)
        self.p11 = Position(chromosome="2",
                            coordinate=5,
                            strand=Strand.REVERSE)
        self.p12 = Position(chromosome="2",
                            coordinate=6,
                            strand=Strand.FORWARD)

        self.s1 = Segment(start_position=self.p1, end_position=self.p2)
        self.s2 = Segment(start_position=self.p3, end_position=self.p4)
        self.s3 = Segment(start_position=self.p5, end_position=self.p6)

        self.s4 = Segment(start_position=self.p7, end_position=self.p8)
        self.s5 = Segment(start_position=self.p9, end_position=self.p10)
        self.s6 = Segment(start_position=self.p11, end_position=self.p12)
Example #7
0
 def setUp(self):
     self.position1 = Position(chromosome="chr1", coordinate=1, strand=Strand.REVERSE)
     self.position2 = Position(chromosome="chr1", coordinate=2, strand=Strand.FORWARD)
Example #8
0
 def test_empty_extra_creation(self):
     self.assertDictEqual(Position(chromosome="chrom1", coordinate=1, strand=Strand.FORWARD).extra, {})
Example #9
0
def refined_segments(segments,
                     additional_positions=None,
                     additional_positions_by_chrs=None):
    fragments = deepcopy(segments)
    if additional_positions is None:
        additional_positions = []
    if additional_positions_by_chrs is None:
        additional_positions_by_chrs = defaultdict(list)
    refined_segments = []
    segments_ids_mapping = defaultdict(list)
    source_fragments_by_chrs = defaultdict(list)
    for fragment in fragments:
        source_fragments_by_chrs[fragment.chromosome].append(fragment)
    for position in additional_positions:
        additional_positions_by_chrs[position.chromosome].append(position)
    for chr_name in list(source_fragments_by_chrs.keys()):
        source_fragments_by_chrs[chr_name] = sorted(
            source_fragments_by_chrs[chr_name],
            key=lambda s: (s.start_coordinate, s.end_coordinate))
        if not sorted_segments_donot_overlap(
                segments=source_fragments_by_chrs[chr_name]):
            raise ValueError(
                "Some segments overlap on chromosome {chr_name}.".format(
                    chr_name=chr_name))
    for chr_name in list(additional_positions_by_chrs.keys()):
        additional_positions_by_chrs[chr_name] = sorted(
            additional_positions_by_chrs[chr_name],
            key=lambda p: (p.coordinate, p.strand))
    for chr_name in source_fragments_by_chrs:
        chr_fragments = iter(source_fragments_by_chrs[chr_name])
        chr_positions = iter(additional_positions_by_chrs[chr_name])
        current_fragment = next(chr_fragments, None)
        current_position = next(chr_positions, None)
        current_segment = deepcopy(current_fragment)
        refined_segments.append(current_segment)
        new_to_old = []
        while current_fragment is not None and current_position is not None:
            if current_position.coordinate < current_fragment.start_coordinate:
                current_position = next(chr_positions, None)
            elif current_position.coordinate == current_segment.start_coordinate and current_position.strand == Strand.REVERSE:
                current_position = next(chr_positions, None)
            elif current_position.coordinate == current_segment.end_coordinate and current_position.strand == Strand.FORWARD:
                current_position = next(chr_positions, None)
            elif current_position.coordinate <= current_fragment.end_coordinate:
                if current_position.strand == Strand.FORWARD:
                    left_partition_coordinate = current_position.coordinate
                else:
                    left_partition_coordinate = current_position.coordinate - 1
                right_partition_coordinate = left_partition_coordinate + 1

                new_end_position = Position(
                    chromosome=chr_name,
                    coordinate=left_partition_coordinate,
                    strand=Strand.FORWARD)
                new_start_position = Position(
                    chromosome=chr_name,
                    coordinate=right_partition_coordinate,
                    strand=Strand.REVERSE)
                current_segment.end_position = new_end_position
                new_to_old.append(current_segment.stable_id_non_hap)
                current_segment = deepcopy(current_fragment)
                current_segment.start_position = new_start_position
                refined_segments.append(current_segment)
                current_position = next(chr_positions, None)
            elif current_position.coordinate > current_fragment.end_coordinate:
                new_to_old.append(current_segment.stable_id_non_hap)
                current_fragment_id = current_fragment.stable_id_non_hap
                for sid in new_to_old:
                    segments_ids_mapping[current_fragment_id].append(sid)
                    segments_ids_mapping[sid].append(current_fragment_id)
                current_fragment = next(chr_fragments, None)
                current_segment = deepcopy(current_fragment)
                if current_fragment is not None:
                    refined_segments.append(current_segment)
                new_to_old = []
            else:
                raise ValueError("Something went wrong")
        if current_fragment is not None and current_segment is not None:
            new_to_old.append(current_segment.stable_id_non_hap)
            current_fragment_id = current_fragment.stable_id_non_hap
            for sid in new_to_old:
                segments_ids_mapping[current_fragment_id].append(sid)
                segments_ids_mapping[sid].append(current_fragment_id)
        current_fragment = next(chr_fragments, None)
        current_segment = deepcopy(current_fragment)
        while current_fragment is not None:
            sid = current_segment.stable_id_non_hap
            segments_ids_mapping[sid].append(sid)
            refined_segments.append(current_segment)
            current_fragment = next(chr_fragments, None)
            current_segment = deepcopy(current_fragment)
    return refined_segments, segments_ids_mapping