def _stranded_ends(self, hit: AMRHitHSP) -> Tuple[int, int]: """ Gets the start/end coordinates, taking into account the strand. :param hit: The hit. :return: The (start,end) as a tuple. """ start = hit.get_genome_contig_start() if hit.get_genome_contig_strand() == 'plus' else hit.get_genome_contig_end() end = hit.get_genome_contig_end() if hit.get_genome_contig_strand() == 'plus' else hit.get_genome_contig_start() return start, end
def append(self, hit: AMRHitHSP) -> None: """ Adds a new blast hit to the set of partitions. :param hit: The hit to add. :return: None """ if hit.get_genome_contig_start() > hit.get_genome_contig_end() and hit.get_genome_contig_strand() == 'plus': raise InvalidPositionException( "Unsupported condition: strand=plus and contig start > contig end for hit (contig=" + hit.get_genome_contig_id() + ", start=" + str(hit.get_genome_contig_start()) + ", end=" + str(hit.get_genome_contig_end()) + ")") partition = self._get_existing_partition(hit) if (partition is None): self._create_new_parition(hit) else: self._add_hit_partition(hit, partition)
def _get_existing_partition(self, hit: AMRHitHSP) -> Optional[Dict[str, Any]]: partition_name = hit.get_genome_contig_id() if partition_name in self._partitions: contig_partitions_list = self._partitions[partition_name] for partition in contig_partitions_list: if self._hit_in_parition(hit, partition): return partition return None
def _create_new_parition(self, hit: AMRHitHSP) -> None: start, end = self._stranded_ends(hit) contig_name = hit.get_genome_contig_id() partition = {'start': start, 'end': end, 'hits': [hit]} if contig_name in self._partitions: self._partitions[contig_name].append(partition) else: self._partitions[contig_name] = [partition]
def testSinglePartitionPlusFailMinusCoords(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() self.assertRaises(InvalidPositionException, parts.append, hit1)
def testSinglePartitionMinus(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='minus') parts = BlastHitPartitions() parts.append(hit1) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(1, len(return_list[0]), "Should only be one hit") self.assertEqual('contig1', return_list[0][0].get_genome_contig_id(), "Should have correct contig name") self.assertEqual(10, return_list[0][0].get_genome_contig_start(), "Should have correct contig start") self.assertEqual(1, return_list[0][0].get_genome_contig_end(), "Should have correct contig end")
def testSinglePartitionIdenticalHitsMinusStrand(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='minus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit1) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(2, len(return_list[0]), "Should be two hits") self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([10, 10], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([1, 1], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends")
def testTwoPartitionsDifferentContigNames(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=1) hit1.get_genome_contig_end = MagicMock(return_value=10) hit1.get_genome_contig_strand = MagicMock(return_value='plus') hit2 = AMRHitHSP(None, None) hit2.get_genome_contig_id = MagicMock(return_value="contig2") hit2.get_genome_contig_start = MagicMock(return_value=1) hit2.get_genome_contig_end = MagicMock(return_value=10) hit2.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit2) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(2, len(return_list), "Should be two partitions") self.assertEqual(1, len(return_list[0]), "Partition 1 should have 1 hit") self.assertEqual(['contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends") self.assertEqual(1, len(return_list[1]), "Partition 2 should have 1 hit") self.assertEqual(['contig2'], [x.get_genome_contig_id() for x in return_list[1]], "Should have correct contig names") self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[1]], "Should have correct contig starts") self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[1]], "Should have correct contig ends")
def testSinglePartitionHit2EdgeWithinHit1Greater(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=5) hit1.get_genome_contig_end = MagicMock(return_value=11) hit1.get_genome_contig_strand = MagicMock(return_value='plus') hit2 = AMRHitHSP(None, None) hit2.get_genome_contig_id = MagicMock(return_value="contig1") hit2.get_genome_contig_start = MagicMock(return_value=10) hit2.get_genome_contig_end = MagicMock(return_value=15) hit2.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit2) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(2, len(return_list[0]), "Should be two hits") self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([5, 10], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([11, 15], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends")