Esempio n. 1
0
 def test_N_special_treatment(self):
     """
     i)A and N at pos 2 are different, but still consensus
     ii)N and N at pos 0 are same, but not consensus"""
     alignment = make_alignment(["NTN", "NTA"])
     result = PrgBuilder.get_consensus(alignment)
     self.assertEqual(result, "*TA")
Esempio n. 2
0
 def test_GivenUnorderedIds_SubalignmentStillInSequenceOrder(self):
     """
     Sequences given rearranged are still output in input order
     """
     result = PrgBuilder.get_sub_alignment_by_list_id(["s3", "s1"], self.alignment)
     expected = MSA([self.alignment[0], self.alignment[2]])
     self.assertTrue(msas_equal(expected, result))
Esempio n. 3
0
 def test_get_subalignment_with_interval(self):
     result = PrgBuilder.get_sub_alignment_by_list_id(
         ["s2", "s3"], self.alignment, [0, 2]
     )
     expected = MSA(
         [SeqRecord(Seq("C--"), id="s2"), SeqRecord(Seq("AAT"), id="s3"),]
     )
     self.assertTrue(msas_equal(expected, result))
Esempio n. 4
0
 def test_ambiguous_alignment_skip_clustering(self):
     """
     `added_seq` below is an equally valid alignment as "A--TTTTA" to the sequence
     "ATTAATTA"
     If we have such ambiguous alignments (defined as more than one gapped alignment
     corresponding to the same ungapped sequence), we choose not to cluster the
     alignment, as it can create ambiguous graphs (whereby different paths spell same sequence)
     """
     added_seq = "ATTTT--A"
     self.tested_params["alignment"] = make_alignment(self.aligned_seqs +
                                                      [added_seq])
     self.assertTrue(PrgBuilder.skip_clustering(**self.tested_params))
Esempio n. 5
0
 def build(self):
     if self.build_type is BuildType.PRG:
         log.debug(f"Copying already-build prg {self._in_fname}")
         shutil.copy(self._in_fname, self.out_fname)
     elif self.build_type is BuildType.MSA:
         log.debug(f"Building variant prg from MSA {self._in_fname}")
         built_prg = PrgBuilder(self._in_fname)
         self.encode_and_write_prg(built_prg.prg)
     else:
         log.debug(
             f"Building invariant prg for region {self.start}-{self.end}")
         self.encode_and_write_prg(self.sequence)
Esempio n. 6
0
 def test_all_gap_nonmatch(self):
     alignment = make_alignment(["A--A", "A--A"])
     result = PrgBuilder.get_consensus(alignment)
     self.assertEqual(result, "A**A")
Esempio n. 7
0
 def test_IUPACAmbiguous_nonmatch(self):
     alignment = make_alignment(["RYA", "RTA"])
     result = PrgBuilder.get_consensus(alignment)
     self.assertEqual(result, "**A")
Esempio n. 8
0
 def test_mixed_match_nonmatch(self):
     alignment = make_alignment(["AAGTA", "CATTA"])
     result = PrgBuilder.get_consensus(alignment)
     self.assertEqual(result, "*A*TA")
Esempio n. 9
0
 def test_nested_snps_under_del(self):
     infile = data_dir / "nested_snps_deletion.fa"
     aseq = PrgBuilder(infile, min_match_length=1)
     self.assertEqual(aseq.prg,
                      "A 5 AA 7 C 8 T 7 AAAA 9 T 10 G 9 AA 6 A 5 AA")
Esempio n. 10
0
 def test_nested_snp_backgrounds(self):
     infile = data_dir / "nested_snps_seq_backgrounds.fa"
     aseq = PrgBuilder(infile, min_match_length=3)
     self.assertEqual(
         aseq.prg, " 5 AAAA 7 T 8 C 7 AAAAAA 6 CCCC 9 T 10 G 9 CCCCCC 5 ")
Esempio n. 11
0
 def test_too_few_seqs_skip_clustering(self):
     self.tested_params["alignment"] = self.tested_params["alignment"][0:1]
     self.assertTrue(PrgBuilder.skip_clustering(**self.tested_params))
Esempio n. 12
0
 def test_small_interval_skip_clustering(self):
     self.tested_params["interval"].stop = 1
     self.assertTrue(PrgBuilder.skip_clustering(**self.tested_params))
Esempio n. 13
0
 def test_max_nesting_reached_skip_clustering(self):
     self.tested_params["nesting_level"] = 2
     self.assertTrue(PrgBuilder.skip_clustering(**self.tested_params))
Esempio n. 14
0
 def test_original_params_no_skip_clustering(self):
     self.assertFalse(PrgBuilder.skip_clustering(**self.tested_params))
Esempio n. 15
0
 def test_GivenOrderedIds_SubalignmentInSequenceOrder(self):
     result = PrgBuilder.get_sub_alignment_by_list_id(["s1", "s3"],
                                                      self.alignment)
     expected = MSA([self.alignment[0], self.alignment[2]])
     self.assertTrue(msas_equal(expected, result))
Esempio n. 16
0
 def test_get_subalignment_with_interval(self):
     result = PrgBuilder.get_sub_alignment_by_list_id(["s2", "s3"],
                                                      self.alignment,
                                                      [0, 2])
     expected = make_alignment(["C--", "AAT"], ["s2", "s3"])
     self.assertTrue(msas_equal(expected, result))
Esempio n. 17
0
    def test_answers_non_nested(self):
        infile = data_dir / "match.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg,
                         "ACGTGTTTTGTAACTGTGCCACACTCTCGAGACTGCATATGTGTC")

        infile = data_dir / "nonmatch.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, " 5 AAACGTGGTT 6 CCCCCCCCCC 5 ")

        infile = data_dir / "match.nonmatch.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACG 5 TGGTT 6 CCCCC 5 ")

        infile = data_dir / "nonmatch.match.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, " 5 AAACGT 6 CCCCCC 5 GGTT")

        infile = data_dir / "match.nonmatch.match.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACG 5 T 6 C 5 GGTT")

        infile = data_dir / "shortmatch.nonmatch.match.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, " 5 AAACGT 6 ATTTTC 5 GGTT")

        infile = data_dir / "match.nonmatch.shortmatch.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAAC 5 GTGGTT 6 CCCCCT 5 ")

        infile = data_dir / "match.staggereddash.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACGTGGTT")

        infile = data_dir / "contains_n.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACG 5 T 6 C 5 GGTT")

        infile = data_dir / "contains_RYKMSW.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACG 5 T 6 C 5 GGTT")

        infile = data_dir / "contains_n_and_RYKMSW.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACG 5 T 6 C 5 GGTT")

        infile = data_dir / "contains_n_and_RYKMSW_no_variants.fa"
        aseq = PrgBuilder(infile)
        self.assertEqual(aseq.prg, "AAACGTGGTT")