Example #1
0
 def test_should_interpret_trailing_whitespace_to_override_pos_to_when_seq_has_insertion(
         self):
     ref = ReferenceChromosome("C*GA")
     annotated_seqs = sequence_builder(ref, ".C  ")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(reads[0].rlen, 2)
Example #2
0
 def test_should_build_with_custom_quality_with_ins(self):
     ref = ReferenceChromosome("AA**A")
     annotated_seqs = sequence_builder(ref, "..CC.", quality_string="31220")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(
         reads[0].qual, self.ascii_codes["3"] + self.ascii_codes["1"] +
         self.ascii_codes["2"] * 2 + self.ascii_codes["0"])
Example #3
0
 def test_should_build_correct_sequence_with_insertion_at_the_end(self):
     ref = ReferenceChromosome("CCC**")
     builders = sequence_builder(ref, "...TT")
     read_lists = [builder.build_reads(0, {}) for builder in builders]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(reads[0].pos, 0)
     self.assertEqual(reads[0].rlen, 5)
     self.assertEqual(reads[0].seq, "CCCTT")
Example #4
0
 def test_should_interpret_trailing_whitespace_to_override_positions_for_complex_ref_and_seq(
         self):
     ref = ReferenceChromosome("ACCC*G*A")
     annotated_seqs = sequence_builder(ref, ".**.C   ")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(reads[0].pos, 0)
     self.assertEqual(reads[0].rlen, 3)
Example #5
0
 def test_should_interpret_trailing_whitespace_to_override_pos_to(self):
     ref = ReferenceChromosome("CATG")
     annotated_seqs = sequence_builder(ref, ".C  ")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(len(reads), 1)
     self.assertEqual(reads[0].pos, 0)
     self.assertEqual(reads[0].seq, "CC")
Example #6
0
 def test_should_build_with_custom_quality_with_del(self):
     ref = ReferenceChromosome("AAAAA")
     annotated_seqs = sequence_builder(ref, "..*..", quality_string="31 00")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     reads.sort(key=lambda x: (x.pos, x.seq, x.qual, x.cigarstring, x.mapq))
     self.assertEqual(
         reads[0].qual, self.ascii_codes["3"] + self.ascii_codes["1"] +
         self.ascii_codes["0"] * 2)
Example #7
0
 def test_should_build_correct_sequence_without_any_whitespace(self):
     ref = ReferenceChromosome("C*CC")
     annotated_seqs = sequence_builder(ref, ".*.T")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(len(reads), 1)
     self.assertEqual(reads[0].pos, 0)
     self.assertEqual(reads[0].rlen, 3)
     self.assertEqual(reads[0].seq, "CCT")
Example #8
0
 def test_should_interpret_leading_whitespace_to_override_pos_from_when_ref_has_deletion(
         self):
     ref = ReferenceChromosome("C*TG")
     annotated_seqs = sequence_builder(ref, "  C.")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(len(reads), 1)
     self.assertEqual(reads[0].pos, 1)
     self.assertEqual(reads[0].seq, "CG")
Example #9
0
 def test_should_build_two_complex_seqs_defined_on_single_line(self):
     ref = ReferenceChromosome("AA*CC*TGTAAGG")
     annotated_seqs = sequence_builder(ref, " .G.  ,c,*,  ")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     reads.sort(key=lambda x: (x.pos, x.seq, x.qual, x.cigarstring, x.mapq))
     self.assertEqual(len(reads), 2)
     self.assertEqual(reads[0].pos, 1)
     self.assertEqual(reads[0].seq, "AGC")
     self.assertEqual(reads[1].pos, 4)
     self.assertEqual(reads[1].seq, "TCTA")
Example #10
0
 def test_should_build_with_custom_quality_and_sequence_shorter_than_reference(
         self):
     ref = ReferenceChromosome("AAAAAAAAAAAA")
     builders = sequence_builder(ref,
                                 "  ..*..     ",
                                 quality_string="  31 0      ")
     read_lists = [builder.build_reads(0, {}) for builder in builders]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(
         reads[0].qual, self.ascii_codes["3"] + self.ascii_codes["1"] +
         self.ascii_codes["0"] + self.default_qual)
Example #11
0
 def add_sequence(self,
                  seq_string,
                  quality_string=None,
                  n_fwd=None,
                  n_rev=None,
                  mapping_quality=HIGH_QUALITY,
                  insert_size=None,
                  read_id=None,
                  read_flags=None,
                  cigar_string=None,
                  read_start=None,
                  read_mate_start=None):
     self._read_sequences_with_coverage.extend(
         sequence_builder(self.reference, seq_string, quality_string, n_fwd,
                          n_rev, mapping_quality, insert_size, read_id,
                          read_flags, cigar_string, read_start,
                          read_mate_start))
     return self
Example #12
0
 def test_should_translate_reverse_seq_into_correct_annotations(self):
     ref = ReferenceChromosome("CCTG")
     annotated_seq = sequence_builder(ref, ",,c,")[0]
     self.assertEqual(annotated_seq.n_fwd, 0)
     self.assertEqual(annotated_seq.n_rev, 1)
Example #13
0
 def test_should_raise_when_assigning_qual_to_deletion(self):
     with self.assertRaisesRegex(
             weCallException,
             "Cannot assign base quality to a deleted base."):
         sequence_builder(ReferenceChromosome("AAAA"), ".*..", " 1  ")
Example #14
0
 def test_should_translate_reverse_seq_into_correct_sequence(self):
     ref = ReferenceChromosome("AAACCTG*TAA")
     builders = sequence_builder(ref, "   ,,c,*,  ")
     read_lists = [builder.build_reads(0, {}) for builder in builders]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(reads[0].seq, 'CCCGT')
Example #15
0
 def test_should_raise_for_invalid_char_in_seq(self):
     with self.assertRaisesRegex(weCallException,
                                 "Illegal character in sequence .*'"):
         sequence_builder(ReferenceChromosome("TAAAA"), "..&..")
Example #16
0
 def test_should_build_with_default_quality_for_None(self):
     ref = ReferenceChromosome("AAAAA")
     annotated_seqs = sequence_builder(ref, ".....")
     read_lists = [builder.build_reads(0, {}) for builder in annotated_seqs]
     reads = [read for read_list in read_lists for read in read_list]
     self.assertEqual(reads[0].qual, self.default_qual * 5)
Example #17
0
 def test_should_raise_when_quality_string_too_long_due_to_insertions(self):
     with self.assertRaisesRegex(
             weCallException,
             "Quality string has to be of the same length as reference."):
         sequence_builder(ReferenceChromosome("TA**A"), "..TT.", "123")
Example #18
0
 def test_should_raise_when_quality_assigned_to_gap(self):
     with self.assertRaisesRegex(
             weCallException, "Cannot assign base quality inside a gap."):
         sequence_builder(ReferenceChromosome("TAAAA*A"), "...  ..",
                          "12  34 ")
Example #19
0
 def test_should_raise_when_quality_string_too_short_multisequence(self):
     with self.assertRaisesRegex(
             weCallException,
             "Quality string has to be of the same length as reference."):
         sequence_builder(ReferenceChromosome("TAAAA*A"), "...  ..",
                          "12   4")
Example #20
0
 def test_should_raise_for_dot_in_reverse_seq(self):
     with self.assertRaisesRegex(weCallException,
                                 "Illegal character in sequence .*"):
         sequence_builder(ReferenceChromosome("TAAAA"), ",,c.,")