Ejemplo n.º 1
0
    def _handle_blast_hit(self, in_file, database_name, blast_file, results,
                          hit_seq_records):
        blast_table = pd.read_table(
            blast_file,
            header=None,
            names=BlastHandler.BLAST_COLUMNS,
            index_col=False).astype(dtype={
                'qseqid': np.unicode_,
                'sseqid': np.unicode_
            })
        partitions = BlastHitPartitions()

        blast_table['plength'] = (blast_table.length /
                                  blast_table.qlen) * 100.0
        blast_table = blast_table[
            (blast_table.pident >= self._pid_threshold)
            & (blast_table.plength >= self._plength_threshold)
            & ~blast_table.qseqid.isin(self._genes_to_exclude)]
        blast_table.sort_values(by=self.BLAST_SORT_COLUMNS, inplace=True)
        for index, blast_record in blast_table.iterrows():
            partitions.append(
                self._create_hit(in_file, database_name, blast_record))

        for hits_non_overlapping in partitions.get_hits_nonoverlapping_regions(
        ):
            for hit in self._select_hits_to_include(hits_non_overlapping):
                blast_results = self._get_result_rows(hit, database_name)
                if blast_results is not None:
                    logger.debug("record = %s", blast_results)
                    results.extend(blast_results)
                    hit_seq_records.append(hit.get_seq_record())
Ejemplo n.º 2
0
    def testTwoPartitionsDifferentContigNames(self):
        hit1 = AMRHitHSP(None, None)

        hit1.get_genome_contig_id = MagicMock(return_value="contig1")
        hit1.get_genome_contig_start = MagicMock(return_value=1)
        hit1.get_genome_contig_end = MagicMock(return_value=10)
        hit1.get_genome_contig_strand = MagicMock(return_value='plus')

        hit2 = AMRHitHSP(None, None)

        hit2.get_genome_contig_id = MagicMock(return_value="contig2")
        hit2.get_genome_contig_start = MagicMock(return_value=1)
        hit2.get_genome_contig_end = MagicMock(return_value=10)
        hit2.get_genome_contig_strand = MagicMock(return_value='plus')

        parts = BlastHitPartitions()

        parts.append(hit1)
        parts.append(hit2)

        return_list = parts.get_hits_nonoverlapping_regions()
        self.assertEqual(2, len(return_list), "Should be two partitions")
        self.assertEqual(1, len(return_list[0]), "Partition 1 should have 1 hit")
        self.assertEqual(['contig1'], [x.get_genome_contig_id() for x in return_list[0]],
                         "Should have correct contig names")
        self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[0]],
                         "Should have correct contig starts")
        self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[0]], "Should have correct contig ends")

        self.assertEqual(1, len(return_list[1]), "Partition 2 should have 1 hit")
        self.assertEqual(['contig2'], [x.get_genome_contig_id() for x in return_list[1]],
                         "Should have correct contig names")
        self.assertEqual([1], [x.get_genome_contig_start() for x in return_list[1]],
                         "Should have correct contig starts")
        self.assertEqual([10], [x.get_genome_contig_end() for x in return_list[1]], "Should have correct contig ends")
Ejemplo n.º 3
0
    def testSinglePartitionHit2EdgeWithinHit1Greater(self):
        hit1 = AMRHitHSP(None, None)

        hit1.get_genome_contig_id = MagicMock(return_value="contig1")
        hit1.get_genome_contig_start = MagicMock(return_value=5)
        hit1.get_genome_contig_end = MagicMock(return_value=11)
        hit1.get_genome_contig_strand = MagicMock(return_value='plus')

        hit2 = AMRHitHSP(None, None)

        hit2.get_genome_contig_id = MagicMock(return_value="contig1")
        hit2.get_genome_contig_start = MagicMock(return_value=10)
        hit2.get_genome_contig_end = MagicMock(return_value=15)
        hit2.get_genome_contig_strand = MagicMock(return_value='plus')

        parts = BlastHitPartitions()

        parts.append(hit1)
        parts.append(hit2)

        return_list = parts.get_hits_nonoverlapping_regions()
        self.assertEqual(1, len(return_list), "Should only be one partition")
        self.assertEqual(2, len(return_list[0]), "Should be two hits")
        self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]],
                         "Should have correct contig names")
        self.assertEqual([5, 10], [x.get_genome_contig_start() for x in return_list[0]],
                         "Should have correct contig starts")
        self.assertEqual([11, 15], [x.get_genome_contig_end() for x in return_list[0]],
                         "Should have correct contig ends")
Ejemplo n.º 4
0
    def testSinglePartitionMinus(self):
        hit1 = AMRHitHSP(None, None)
        hit1.get_genome_contig_id = MagicMock(return_value="contig1")
        hit1.get_genome_contig_start = MagicMock(return_value=10)
        hit1.get_genome_contig_end = MagicMock(return_value=1)
        hit1.get_genome_contig_strand = MagicMock(return_value='minus')

        parts = BlastHitPartitions()

        parts.append(hit1)

        return_list = parts.get_hits_nonoverlapping_regions()
        self.assertEqual(1, len(return_list), "Should only be one partition")
        self.assertEqual(1, len(return_list[0]), "Should only be one hit")
        self.assertEqual('contig1', return_list[0][0].get_genome_contig_id(), "Should have correct contig name")
        self.assertEqual(10, return_list[0][0].get_genome_contig_start(), "Should have correct contig start")
        self.assertEqual(1, return_list[0][0].get_genome_contig_end(), "Should have correct contig end")
Ejemplo n.º 5
0
    def testSinglePartitionPlusFailMinusCoords(self):
        hit1 = AMRHitHSP(None, None)
        hit1.get_genome_contig_id = MagicMock(return_value="contig1")
        hit1.get_genome_contig_start = MagicMock(return_value=10)
        hit1.get_genome_contig_end = MagicMock(return_value=1)
        hit1.get_genome_contig_strand = MagicMock(return_value='plus')

        parts = BlastHitPartitions()

        self.assertRaises(InvalidPositionException, parts.append, hit1)
Ejemplo n.º 6
0
    def testSinglePartitionIdenticalHitsMinusStrand(self):
        hit1 = AMRHitHSP(None, None)

        hit1.get_genome_contig_id = MagicMock(return_value="contig1")
        hit1.get_genome_contig_start = MagicMock(return_value=10)
        hit1.get_genome_contig_end = MagicMock(return_value=1)
        hit1.get_genome_contig_strand = MagicMock(return_value='minus')

        parts = BlastHitPartitions()

        parts.append(hit1)
        parts.append(hit1)

        return_list = parts.get_hits_nonoverlapping_regions()
        self.assertEqual(1, len(return_list), "Should only be one partition")
        self.assertEqual(2, len(return_list[0]), "Should be two hits")
        self.assertEqual(['contig1', 'contig1'], [x.get_genome_contig_id() for x in return_list[0]],
                         "Should have correct contig names")
        self.assertEqual([10, 10], [x.get_genome_contig_start() for x in return_list[0]],
                         "Should have correct contig starts")
        self.assertEqual([1, 1], [x.get_genome_contig_end() for x in return_list[0]],
                         "Should have correct contig ends")