def _handle_blast_hit(self, in_file, database_name, blast_file, results, hit_seq_records): blast_table = pd.read_table( blast_file, header=None, names=BlastHandler.BLAST_COLUMNS, index_col=False).astype(dtype={ 'qseqid': np.unicode_, 'sseqid': np.unicode_ }) partitions = BlastHitPartitions() blast_table['plength'] = (blast_table.length / blast_table.qlen) * 100.0 blast_table = blast_table[ (blast_table.pident >= self._pid_threshold) & (blast_table.plength >= self._plength_threshold) & ~blast_table.qseqid.isin(self._genes_to_exclude)] blast_table.sort_values(by=self.BLAST_SORT_COLUMNS, inplace=True) for index, blast_record in blast_table.iterrows(): partitions.append( self._create_hit(in_file, database_name, blast_record)) for hits_non_overlapping in partitions.get_hits_nonoverlapping_regions( ): for hit in self._select_hits_to_include(hits_non_overlapping): blast_results = self._get_result_rows(hit, database_name) if blast_results is not None: logger.debug("record = %s", blast_results) results.extend(blast_results) hit_seq_records.append(hit.get_seq_record())
def testTwoPartitionsDifferentContigNames(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=1) hit1.get_genome_contig_end = MagicMock(return_value=10) hit1.get_genome_contig_strand = MagicMock(return_value='plus') hit2 = AMRHitHSP(None, None) hit2.get_genome_contig_id = MagicMock(return_value="contig2") hit2.get_genome_contig_start = MagicMock(return_value=1) hit2.get_genome_contig_end = MagicMock(return_value=10) hit2.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit2) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(2, len(return_list), "Should be two partitions") self.assertEqual(1, len(return_list[0]), "Partition 1 should have 1 hit") self.assertEqual(['contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends") self.assertEqual(1, len(return_list[1]), "Partition 2 should have 1 hit") self.assertEqual(['contig2'], [x.get_genome_contig_id() for x in return_list[1]], "Should have correct contig names") self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[1]], "Should have correct contig starts") self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[1]], "Should have correct contig ends")
def testSinglePartitionHit2EdgeWithinHit1Greater(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=5) hit1.get_genome_contig_end = MagicMock(return_value=11) hit1.get_genome_contig_strand = MagicMock(return_value='plus') hit2 = AMRHitHSP(None, None) hit2.get_genome_contig_id = MagicMock(return_value="contig1") hit2.get_genome_contig_start = MagicMock(return_value=10) hit2.get_genome_contig_end = MagicMock(return_value=15) hit2.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit2) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(2, len(return_list[0]), "Should be two hits") self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([5, 10], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([11, 15], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends")
def testSinglePartitionMinus(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='minus') parts = BlastHitPartitions() parts.append(hit1) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(1, len(return_list[0]), "Should only be one hit") self.assertEqual('contig1', return_list[0][0].get_genome_contig_id(), "Should have correct contig name") self.assertEqual(10, return_list[0][0].get_genome_contig_start(), "Should have correct contig start") self.assertEqual(1, return_list[0][0].get_genome_contig_end(), "Should have correct contig end")
def testSinglePartitionPlusFailMinusCoords(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='plus') parts = BlastHitPartitions() self.assertRaises(InvalidPositionException, parts.append, hit1)
def testSinglePartitionIdenticalHitsMinusStrand(self): hit1 = AMRHitHSP(None, None) hit1.get_genome_contig_id = MagicMock(return_value="contig1") hit1.get_genome_contig_start = MagicMock(return_value=10) hit1.get_genome_contig_end = MagicMock(return_value=1) hit1.get_genome_contig_strand = MagicMock(return_value='minus') parts = BlastHitPartitions() parts.append(hit1) parts.append(hit1) return_list = parts.get_hits_nonoverlapping_regions() self.assertEqual(1, len(return_list), "Should only be one partition") self.assertEqual(2, len(return_list[0]), "Should be two hits") self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]], "Should have correct contig names") self.assertEqual([10, 10], [x.get_genome_contig_start() for x in return_list[0]], "Should have correct contig starts") self.assertEqual([1, 1], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends")