Exemple #1
0
 def test_find_multiple_single_base_deletion(self):
     ref = ReferenceChromosome("TTAAAAAGAAAAT")
     seq = Sequence(ref, "..*.....*....")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "TA", "T"),
         Variant(ref.chrom, 7, "GA", "G")
     })
Exemple #2
0
 def test_should_find_multiple_snps(self):
     ref = ReferenceChromosome("AAAAAAAAAAAAA")
     seq = Sequence(ref, ".C.........T.")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "A", "C"),
         Variant(ref.chrom, 11, "A", "T")
     })
Exemple #3
0
 def test_find_adjacent_insertion_and_snp(self):
     ref = ReferenceChromosome("T*ATAAAAAAAT")
     seq = Sequence(ref, ".CG.........")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 0, "T", "TC"),
         Variant(ref.chrom, 1, "A", "G")
     })
Exemple #4
0
 def test_find_adjacent_snp_and_deletion(self):
     ref = ReferenceChromosome("TTAAAAAAAAAT")
     seq = Sequence(ref, ".G*.........")
     self.assertEqual(seq.variants, {
         Variant(ref.chrom, 1, "T", "G"),
         Variant(ref.chrom, 1, "TA", "T")
     })
Exemple #5
0
 def test_find_multiple_variants(self):
     ref = ReferenceChromosome("TA*AAAGCTAACT")
     seq = Sequence(ref, ".GC...T...**.")
     self.assertEqual(
         seq.variants, {
             Variant(ref.chrom, 1, "A", "G"),
             Variant(ref.chrom, 1, "A", "AC"),
             Variant(ref.chrom, 5, "G", "T"),
             Variant(ref.chrom, 8, "AAC", "A")
         })
Exemple #6
0
    def __get_expected_calls_from_haplotypes(ascii_strings, reference):
        if len(ascii_strings) != 2:
            raise weCallException(
                "Expected calls have to be defined as a diploid.")
        if not all(len(str) == reference.length_with_deletions()
                   for str in ascii_strings):
            raise weCallException(
                "Ascii haplotypes have to be of the same length as the reference")

        vars_from_hap1 = Sequence(reference, ascii_strings[0]).variants
        vars_from_hap2 = Sequence(reference, ascii_strings[1]).variants

        calls = {}
        for var in vars_from_hap1.intersection(vars_from_hap2):
            calls[var] = GenotypeCall("1/1")
        for var in vars_from_hap1.symmetric_difference(vars_from_hap2):
            calls[var] = GenotypeCall("0/1")

        return calls
Exemple #7
0
def build_annotated_pair(fwd, rev, n_fwd, n_rev, mapping_quality, insert_size,
                         read_id, read_flags, cigar_string, read_start,
                         read_mate_start):
    fwd_reference = ReferenceChromosome(fwd.reference_string, fwd.pos_from)
    rev_reference = ReferenceChromosome(rev.reference_string, rev.pos_from)
    fwd_sequence = Sequence(fwd_reference,
                            fwd.sequence_string.replace(",", ".").upper(),
                            cigar_string)
    rev_sequence = Sequence(rev_reference,
                            rev.sequence_string.replace(",", ".").upper(),
                            cigar_string)
    fwd_quality = SequenceQuality(fwd.quality_string)
    rev_quality = SequenceQuality(rev.quality_string)

    fwd_read_sequence = ReadSequence(fwd_sequence, fwd_quality,
                                     mapping_quality, insert_size, read_id,
                                     read_flags, read_start, read_mate_start)
    rev_read_sequence = ReadSequence(rev_sequence, rev_quality,
                                     mapping_quality, insert_size, read_id,
                                     read_flags, read_start, read_mate_start)
    return [
        ReadPairWithCoverage(fwd_read_sequence, rev_read_sequence, n_fwd,
                             n_rev)
    ]
Exemple #8
0
    def build_annotated_seq(self, n_fwd, n_rev, mapping_quality, insert_size,
                            read_id, read_flags, cigar_string, read_start,
                            read_mate_start):
        reference = ReferenceChromosome(self.reference_string, self.pos_from)
        sequence = Sequence(reference,
                            self.sequence_string.replace(",", ".").upper(),
                            cigar_string)
        quality = SequenceQuality(self.quality_string)

        read_sequence = ReadSequence(sequence, quality, mapping_quality,
                                     insert_size, read_id, read_flags,
                                     read_start, read_mate_start)
        if n_fwd is not None:
            return [ReadSequenceWithCoverage(read_sequence, n_fwd, n_rev)]
        elif self.is_reverse_seq():
            return [ReadSequenceWithCoverage(read_sequence, 0, 1)]
        elif self.is_forward_seq():
            return [ReadSequenceWithCoverage(read_sequence, 1, 0)]
        else:
            raise weCallException(
                "Raw sequence: {} is neither forward or reverse".format(self))
Exemple #9
0
 def test_should_find_multi_base_insertion(self):
     ref = ReferenceChromosome("CT**AAAAAAAAT")
     seq = Sequence(ref, "..GC.........")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "T", "TGC")})
Exemple #10
0
 def test_finds_snp(self):
     ref = ReferenceChromosome("AAAAAAAAAAAAA")
     seq = Sequence(ref, ".C...........")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "A", "C")})
Exemple #11
0
 def test_should_not_find_deletion_on_left_edge(self):
     ref = ReferenceChromosome("TAGCAAAAAAAT")
     seq = Sequence(ref, "*...........")
     print((seq.variants))
     self.assertEqual(len(seq.variants), 0)
Exemple #12
0
 def test_should_not_find_long_deletion_on_right_edge(self):
     ref = ReferenceChromosome("TTAGCAAAAAACT")
     seq = Sequence(ref, "..........***")
     self.assertEqual(len(seq.variants), 0)
Exemple #13
0
 def test_should_get_empty_cigar(self):
     ref = ReferenceChromosome("")
     seq = Sequence(ref, "")
     self.assertEqual(str(seq.cigar), "")
Exemple #14
0
 def test_find_multi_base_deletion_with_deletion_in_reference(self):
     ref = ReferenceChromosome("TTA*AAAAAAAAT")
     seq = Sequence(ref, "..**.........")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 1, "TA", "T")})
Exemple #15
0
 def test_finds_snp_at_the_ref_start(self):
     ref = ReferenceChromosome("CATAAAAAAAA")
     seq = Sequence(ref, "T..........")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 0, "C", "T")})
Exemple #16
0
 def test_finds_snp_at_the_ref_end(self):
     ref = ReferenceChromosome("CATAAAAAAAT")
     seq = Sequence(ref, "..........C")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 10, "T", "C")})
Exemple #17
0
 def test_finds_snp_after_asterix(self):
     ref = ReferenceChromosome("T*CATAAAAAAAA")
     seq = Sequence(ref, ".*.C.........")
     self.assertEqual(seq.variants, {Variant(ref.chrom, 2, "A", "C")})
Exemple #18
0
 def test_should_get_correct_cigar_for_dots(self):
     ref = ReferenceChromosome("CCAA")
     seq = Sequence(ref, "....")
     self.assertEqual(str(seq.cigar), "4M")
Exemple #19
0
 def test_should_get_correct_cigar_for_snp(self):
     ref = ReferenceChromosome("TTT")
     seq = Sequence(ref, ".G.")
     self.assertEqual(str(seq.cigar), "3M")
Exemple #20
0
 def test_should_get_correct_cigar_for_multiple_events(self):
     ref = ReferenceChromosome("CCC***AAATTT")
     seq = Sequence(ref, "A*.T*T...**C")
     self.assertEqual(str(seq.cigar), "1M1D1M2I3M2D1M")
Exemple #21
0
 def test_should_get_correct_cigar_for_padding_and_insertion(self):
     ref = ReferenceChromosome("T*T")
     seq = Sequence(ref, ".C.")
     self.assertEqual(str(seq.cigar), "1M1I1M")