예제 #1
0
def test_init_ends_with_skip_both_before_and_after_clv():
    """
             AA
           GT┘       <-bridge read
       G--AGT-GC     <-bridge contig
       0  123 456    <-contig coord
          cc^   ^ice
    ...GACAGTTGC...  <-genome
       5678901234    <-genome coord
            1   |
          rc^   ^ire
    """
    ref_clv = 10
    cigartuples = (
        (S.BAM_CMATCH, 1),
        (S.BAM_CREF_SKIP, 2),
        (S.BAM_CMATCH, 3),
        (S.BAM_CREF_SKIP, 1),
        (S.BAM_CMATCH, 2),
    )
    ctg_clv = 3
    ctg_seq = 'GAGTGC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 14
    assert init_ctg_end(ctg_seq) == 6
예제 #2
0
def test_init_ends_with_deletion():
    """
               AA
             CG┘      <-bridge read
       GAC__TCGTC     <-bridge contig
       012  345678    <-contig coord
          | ||x
          | |^cc ^ice
    ...GACGGTCCTC...  <-genome
       56789012345    <-genome coord
            1|   |
             ^rc ^rce
    """

    ref_clv = 11
    cigartuples = (
        (S.BAM_CMATCH, 3),
        (S.BAM_CDEL, 2),
        (S.BAM_CMATCH, 5),
    )
    ctg_clv = 4
    ctg_seq = 'GACTCGTC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 15
    assert init_ctg_end(ctg_seq) == 8
예제 #3
0
def test_link_init_ends():
    """
          TT...AA
       ATCGAC    <-link contig
       0123456   <-contig coord
            ^ctg_clv
    ...7890123... <-genome coord
          1 ^ref_clv
    """
    ref_clv = 12
    cigartuples = ((S.BAM_CMATCH, 6), )
    ctg_clv = 5
    ctg_seq = 'ATCGAC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 13
    assert init_ctg_end(ctg_seq) == 6
예제 #4
0
def test_init_ends():
    """
             AA
           GT┘       <-bridge read
       GACGGTTGC     <-bridge contig
       0123456789    <-contig coord
          cc^   ^ice
    ...GACGGTTGC...  <-genome
       5678901234    <-genome coord
            1   |
          rc^   ^ire
    """
    ref_clv = 10
    cigartuples = ((S.BAM_CMATCH, 9), )
    ctg_clv = 5
    ctg_seq = 'GACGGTTGC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 14
    assert init_ctg_end(ctg_seq) == 9
예제 #5
0
def test_bridge_init_ends_with_hardclip_before_clv():
    """
              AA
            TC┘|      <-bridge read
       CGCATTCGT      <-bridge contig
       \\\            <-hardclip mask
       0123456789     <-contig coord
           cc^  ^ice
    ...CGCATTCGXX...  <-genome
       2345678901     <-genome coord
             | 1|
           rc^  ^ire
    """

    ref_clv = 8
    cigartuples = ((S.BAM_CHARD_CLIP, 3), (S.BAM_CMATCH, 6))
    ctg_clv = 6
    ctg_seq = 'CGCATTCGT'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 11
    assert init_ctg_end(ctg_seq) == 9
예제 #6
0
def test_init_ends_with_insertion():
    """
         AG   AA      <-inserted bases
         ┬  GT┘       <-bread read
       GA CGGTCGC     <-bridge contig
       01 45678901    <-contig coord
        x    |  1|
        x  cc^   ^ice
    ...GT CGGTCGC...  <-genome
       56 78901234    <-genome coord
             |   |
           rc^   ^ire
    """

    ref_clv = 10
    cigartuples = ((S.BAM_CMATCH, 2), (S.BAM_CINS, 2), (S.BAM_CMATCH, 7))
    ctg_clv = 7
    ctg_seq = 'GAAGCGGTCGC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 14
    assert init_ctg_end(ctg_seq) == 11
예제 #7
0
def test_init_ends_with_multiple_skips_before_clv():
    """
               AA
             TA┘       <-bridge read
       G-C--CTAGC      <-bridge contig
       0 1  234567     <-contig coord
        ||| x ^cc^ice
    ...GACTGGTAGC...   <-genome
       56789012345     <-genome coord
            1 |  |
            rc^  ^ire
    """

    ref_clv = 12
    cigartuples = ((S.BAM_CMATCH, 1), (S.BAM_CREF_SKIP, 1), (S.BAM_CMATCH, 1),
                   (S.BAM_CREF_SKIP, 2), (S.BAM_CMATCH, 5))
    ctg_clv = 4
    ctg_seq = 'GCCTAGC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 15
    assert init_ctg_end(ctg_seq) == 7
예제 #8
0
def test_bridge_init_ends_with_hardclip_after_clv():
    """
             AAA
          GTT┘                  <-bridge read
       A-GGTTGCAGA              <-suffix contig
       | |  |  ///              <-hardclip mask
       0 1234567890             <-contig coord
          cc^     ^ice
    ...ACGGTTGC...              <-genome
       789012345678             <-genome coord
          1 |     |
          rc^     ^init_ref_idx
    """

    ref_clv = 12
    cigartuples = ((S.BAM_CMATCH, 1), (S.BAM_CREF_SKIP, 1), (S.BAM_CMATCH, 6),
                   (S.BAM_CHARD_CLIP, 3))
    ctg_clv = 4
    ctg_seq = 'AGGTTGCAGA'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 18
    assert init_ctg_end(ctg_seq) == 10
예제 #9
0
def test_bridge_init_ends_with_softclip_after_clv():
    """
              AA       <-polyA clip
            TC┘|       <-bridge read
            ||  CC     <-non-polyA softclip
       CGCATTCGT┘|     <-bridge contig (hardcipped, could be chimeric https://www.biostars.org/p/109333/)
       \\\   |
       012345678901    <-contig coord
           cc^    ^ice
    ...CGCATTCGTXX...  <-genome
       234567890123    <-genome coord
             | 1  |
           rc^    ^ire
    """
    ref_clv = 8
    cigartuples = ((S.BAM_CHARD_CLIP, 3), (S.BAM_CMATCH, 6), (S.BAM_CSOFT_CLIP,
                                                              2))
    ctg_clv = 6
    ctg_seq = 'CGCATTCGTCC'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 13
    assert init_ctg_end(ctg_seq) == 11
예제 #10
0
def test_bridge_init_ends_with_sofclip_before_clv():
    """
             AAA                <-polyA softclip
          GTT┘                  <-bridge read
    CCC   |||                   <-non-polyA softclip
    01└GA-GGTTGCAGA             <-suffix contig
       |  |||  ////             <-hardclip mask
       34 5678901234            <-contig coord
          cc^  |   ^ice
    ...ACGGTTGC...              <-genome
       7890123456789             <-genome coord
          1 |      |
          rc^      ^init_ref_idx
    """
    ref_clv = 12
    cigartuples = ((S.BAM_CSOFT_CLIP, 3), (S.BAM_CMATCH, 2),
                   (S.BAM_CREF_SKIP, 1), (S.BAM_CMATCH, 5), (S.BAM_CHARD_CLIP,
                                                             4))
    ctg_clv = 7
    ctg_seq = 'CCCGAGGTTGCAGA'

    assert init_ref_end(ref_clv, cigartuples, ctg_clv, ctg_seq) == 19
    assert init_ctg_end(ctg_seq) == 14