Esempio n. 1
0
    def test_region_subject_to_query_00(self):
        a = AlignedPair(
            ("a", "ABCDEFG"),
            ("b", "---KLMN"),
        )

        r = AlignedRegion.from_subject(a, 0, 2)
        self.assertEqual(r.in_alignment(), (3, 5)) # KL
        self.assertEqual(r.in_subject(), (0, 2)) # KL
        self.assertEqual(r.in_query(), (3, 5)) # DE

        r = AlignedRegion.from_subject(a, 0, 0)
        self.assertEqual(r.in_alignment(), (3, 3)) # --- | KLMN
        self.assertEqual(r.in_subject(), (0, 0)) # empty sequence
        self.assertEqual(r.in_query(), (3, 3)) # ABC | DEFG
Esempio n. 2
0
 def test_from_subject_region_crazy(self):
     a = AlignedPair(
         ("a", "-A-BC-EF---"),
         ("b", "--HI-JK-LMN"))
     r = AlignedRegion.from_subject(a, 0, 3)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 6)
Esempio n. 3
0
 def test_from_subject_with_endgaps(self):
     a = AlignedPair(
         ("a", "--ABC-EF---"),
         ("b", "HIJKLMNOPQR"))
     r = AlignedRegion.from_subject(a, 1, 7)
     self.assertEqual(r.start_idx, 1)
     self.assertEqual(r.end_idx, 7)
Esempio n. 4
0
 def test_from_subject_no_endgaps(self):
     a = AlignedPair(
         ("a", "ABCDEF"),
         ("b", "HIJKLM"))
     r = AlignedRegion.from_subject(a, 2, 5)
     self.assertEqual(r.start_idx, 2)
     self.assertEqual(r.end_idx, 5)
Esempio n. 5
0
    def find_in_seqs(self, seqs):
        if seqs.all_matched():
            return

        # Create the file paths
        subject_fp = self._make_fp("subject_{0}.fa".format(self.suffix))
        query_fp = self._make_fp("query_{0}.fa".format(self.suffix))
        result_fp = self._make_fp("query_{0}.txt".format(self.suffix))

        # Search
        with open(subject_fp, "w") as f:
            write_fasta(f, seqs.get_matched_offset0())
        ba = VsearchAligner(subject_fp)
        search_args = {
            "min_id": round(self.min_pct_id / 100, 2),
            "top_hits_only": None}
        if self.cores > 0:
            search_args["threads"] = self.cores
        hits = ba.search(
            seqs.get_unmatched_recs(), input_fp=query_fp, output_fp=result_fp,
            **search_args)

        # Refine
        bext = HitExtender(seqs.get_unmatched_recs(), seqs.get_matched_offset0())
        for hit in hits:
            alignment = bext.extend_hit(hit)
            subject_match = seqs.matches[alignment.subject_id]
            aligned_region = AlignedRegion.from_subject(
                alignment, subject_match.start, subject_match.end)
            query_start_idx, query_end_idx = aligned_region.in_query()
            query_offset = aligned_region.query_offset()
            matchobj = PrimerMatch(
                query_start_idx, query_end_idx, query_offset, "Alignment")
            yield alignment.query_id, matchobj
Esempio n. 6
0
    def test_region_subject_to_query_crazy_alignment(self):
        a = AlignedPair(
            ("a", "-A-BC-EF---"),
            ("b", "--HI-JK-LMN"))

        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_subject(), (0, 3)) # HIJ
        self.assertEqual(r.in_alignment(), (2, 6)) # HI-J
        self.assertEqual(r.in_query(), (1, 3)) # BC in HIJ

        r = AlignedRegion.from_subject(a, 1, 4)
        self.assertEqual(r.in_subject(), (1, 4)) # IJK
        self.assertEqual(r.in_alignment(), (3, 7)) # I-JK
        self.assertEqual(r.in_query(), (1, 4)) # BCE in IJK

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_subject(), (0, 7)) # whole sequence, HIJKLMN
        self.assertEqual(r.in_alignment(), (2, 11)) # HI-JK-LMN
        self.assertEqual(r.in_query(), (1, 5)) # BCEF in subject
Esempio n. 7
0
    def test_region_subject_to_query_with_endgaps(self):
        a = AlignedPair(
            ("a", "--ABC-EF---"),
            ("b", "HIJKLMNOPQR"))

        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_subject(), (0, 3)) # HIJ
        self.assertEqual(r.in_alignment(), (0, 3))
        self.assertEqual(r.in_query(), (0, 1)) # A in HIJ

        r = AlignedRegion.from_subject(a, 1, 6)
        self.assertEqual(r.in_subject(), (1, 6)) # IJKLM
        self.assertEqual(r.in_alignment(), (1, 6))
        self.assertEqual(r.in_query(), (0, 3)) # ABC in IJKLM

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_subject(), (0, 11)) # whole sequence
        self.assertEqual(r.in_alignment(), (0, 11))
        self.assertEqual(r.in_query(), (0, 5)) # ABCEF in subject
Esempio n. 8
0
    def test_region_subject_to_query_no_endgaps(self):
        a = AlignedPair(
            ("a", "ABCDEF"),
            ("b", "HIJKLM"))
        # In an alignment with no gaps, the query sequence coordinates should
        # always match the subject sequence coordinates
        r = AlignedRegion.from_subject(a, 0, 3)
        self.assertEqual(r.in_alignment(), (0, 3))
        rq = AlignedRegion.from_query(a, 0, 3)
        self.assertEqual(r.in_alignment(), (0, 3))

        r = AlignedRegion.from_subject(a, 1, 5)
        self.assertEqual(r.in_alignment(), (1, 5))
        rq = AlignedRegion.from_query(a, 1, 5)
        self.assertEqual(r.in_alignment(), (1, 5))

        r = AlignedRegion.from_subject(a)
        self.assertEqual(r.in_alignment(), (0, 6))
        rq = AlignedRegion.from_query(a)
        self.assertEqual(r.in_alignment(), (0, 6))