예제 #1
0
def test_locus_reads_insertion():
    """
    test_partitioned_read_sequences_insertion : Test that read gets correctly
    partitioned for chr1:4 T>TG
    where the sequence for chr1 is assumed to be "ACCTTG"
    and the variant sequence is "ACCTGTG"
    """
    variant = Variant(
        "chromosome", 4, ref="T", alt="TG", normalize_contig_name=False)

    pysam_read = make_read(seq="ACCTGTG", cigar="4M1I2M", mdtag="6")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(locus_read_generator(
        samfile=samfile,
        chromosome="chromosome",
        base1_position_before_variant=variant.start,
        base1_position_after_variant=variant.start + 1))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        # expect the inserted nucleotide to be missing a corresponding
        # ref position
        reference_positions=[0, 1, 2, 3, None, 4, 5],
        quality_scores=pysam_read.query_qualities,
        base0_read_position_before_variant=3,
        base0_read_position_after_variant=5)
    assert_equal_fields(read, expected)
예제 #2
0
def test_locus_reads_deletion():
    """
    test_partitioned_read_sequences_deletion : Test that read gets correctly
    partitioned for chr1:4 TT>T where the sequence for chr1 is assumed to
    be "ACCTTG"
    """
    # normalization of this variant will turn it into the deletion of
    # "T" at base-1 position 5
    variant = Variant(
        "chromosome", 4, ref="TT", alt="T", normalize_contig_name=False)
    print(variant)
    pysam_read = make_read(seq="ACCTG", cigar="4M1D1M", mdtag="4^T1")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(locus_read_generator(
        samfile=samfile,
        chromosome="chromosome",
        base1_position_before_variant=variant.start - 1,
        base1_position_after_variant=variant.start + 1))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        reference_positions=[0, 1, 2, 3, 5],
        quality_scores=pysam_read.query_qualities,
        base0_read_position_before_variant=3,
        base0_read_position_after_variant=4)
    assert_equal_fields(read, expected)
예제 #3
0
def test_locus_reads_substitution_longer():
    # test C>GG subsitution at second nucleotide of reference sequence "ACCTTG",
    # the alignment is interpreted as a C>G variant followed by an insertion of
    # another G
    variant = Variant(
        "chromosome", 2, ref="C", alt="GG", normalize_contig_name=False)
    print(variant)
    pysam_read = make_read(seq="AGGCTTG", cigar="2M1I4M", mdtag="1C4")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(locus_read_generator(
        samfile=samfile,
        chromosome="chromosome",
        base1_position_before_variant=1,
        base1_position_after_variant=3))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        reference_positions=[0, 1, None, 2, 3, 4, 5],
        quality_scores=pysam_read.query_qualities,
        base0_read_position_before_variant=0,
        base0_read_position_after_variant=3)
    assert_equal_fields(read, expected)
예제 #4
0
def test_locus_reads_substitution_shorter():
    # test CC>G subsitution at 2nd and 3rd nucleotides of reference sequence
    # "ACCTTG", for which the alignment is interpreted as a C>G variant
    # followed by the deletion of a C
    variant = Variant(
        "chromosome", 2, ref="CC", alt="G", normalize_contig_name=False)
    print(variant)
    pysam_read = make_read(seq="AGTTG", cigar="2M1D3M", mdtag="1C^C4")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(locus_read_generator(
        samfile=samfile,
        chromosome="chromosome",
        base1_position_before_variant=1,
        base1_position_after_variant=4))
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    print(reads)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        reference_positions=[0, 1, 3, 4, 5],
        quality_scores=pysam_read.query_qualities,
        base0_read_position_before_variant=0,
        base0_read_position_after_variant=2)
    assert_equal_fields(read, expected)
예제 #5
0
def test_locus_reads_snv():
    """
    test_partitioned_read_sequences_snv : Test that read gets correctly
    partitioned for chr1:4 T>G where the sequence for chr1 is assumed
    to be "ACCTTG"
    """
    # chr1_seq = "ACCTTG"
    variant = Variant("chromosome",
                      4,
                      ref="T",
                      alt="G",
                      normalize_contig_name=False)

    pysam_read = make_read(seq="ACCGTG", cigar="6M", mdtag="3G2")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(
        locus_read_generator(samfile=samfile,
                             chromosome="chromosome",
                             base1_position_before_variant=variant.start - 1,
                             base1_position_after_variant=variant.start + 1))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, 2, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         base0_read_position_before_variant=2,
                         base0_read_position_after_variant=4)
    assert_equal_fields(read, expected)
예제 #6
0
def test_locus_reads_substitution_longer():
    # test C>GG subsitution at second nucleotide of reference sequence "ACCTTG",
    # the alignment is interpreted as a C>G variant followed by an insertion of
    # another G
    variant = Variant("chromosome",
                      2,
                      ref="C",
                      alt="GG",
                      normalize_contig_name=False)
    print(variant)
    pysam_read = make_read(seq="AGGCTTG", cigar="2M1I4M", mdtag="1C4")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(
        locus_read_generator(samfile=samfile,
                             chromosome="chromosome",
                             base1_position_before_variant=1,
                             base1_position_after_variant=3))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, None, 2, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         base0_read_position_before_variant=0,
                         base0_read_position_after_variant=3)
    assert_equal_fields(read, expected)
예제 #7
0
def test_locus_reads_substitution_shorter():
    # test CC>G subsitution at 2nd and 3rd nucleotides of reference sequence
    # "ACCTTG", for which the alignment is interpreted as a C>G variant
    # followed by the deletion of a C
    variant = Variant("chromosome",
                      2,
                      ref="CC",
                      alt="G",
                      normalize_contig_name=False)
    print(variant)
    pysam_read = make_read(seq="AGTTG", cigar="2M1D3M", mdtag="1C^C4")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(
        locus_read_generator(samfile=samfile,
                             chromosome="chromosome",
                             base1_position_before_variant=1,
                             base1_position_after_variant=4))
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    print(reads)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         base0_read_position_before_variant=0,
                         base0_read_position_after_variant=2)
    assert_equal_fields(read, expected)
예제 #8
0
def test_locus_reads_deletion():
    """
    test_partitioned_read_sequences_deletion : Test that read gets correctly
    partitioned for chr1:4 TT>T where the sequence for chr1 is assumed to
    be "ACCTTG"
    """
    # normalization of this variant will turn it into the deletion of
    # "T" at base-1 position 5
    variant = Variant("1", 4, ref="TT", alt="T")
    pysam_read = make_pysam_read(seq="ACCTG", cigar="4M1D1M", mdtag="4^T1")

    samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read])
    read_creator = ReadCollector()
    reads = read_creator.get_locus_reads(samfile, "chromosome",
                                         variant.start - 1, variant.start)
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        reference_positions=[0, 1, 2, 3, 5],
        quality_scores=pysam_read.query_qualities,
        # missing would have gone after 4th nucleotide in the read
        read_base0_start_inclusive=4,
        read_base0_end_exclusive=4,
        reference_base0_start_inclusive=4,
        reference_base0_end_exclusive=5)
    assert_equal_fields(read, expected)
예제 #9
0
def test_locus_reads_insertion():
    """
    test_partitioned_read_sequences_insertion : Test that read gets correctly
    partitioned for chr1:4 T>TG
    where the sequence for chr1 is assumed to be "ACCTTG"
    and the variant sequence is "ACCTGTG"
    """
    variant = Variant("1", 4, ref="T", alt="TG")

    pysam_read = make_pysam_read(seq="ACCTGTG", cigar="4M1I2M", mdtag="6")

    samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read])
    read_creator = ReadCollector()
    reads = read_creator.get_locus_reads(samfile, "chromosome", variant.start,
                                         variant.start)
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        # expect the inserted nucleotide to be missing a corresponding
        # ref position
        reference_positions=[0, 1, 2, 3, None, 4, 5],
        quality_scores=pysam_read.query_qualities,
        read_base0_start_inclusive=4,
        read_base0_end_exclusive=5,
        reference_base0_start_inclusive=4,
        reference_base0_end_exclusive=4)
    print("Actual: %s" % (read, ))
    print("Expected: %s" % (expected, ))
    assert_equal_fields(read, expected)
예제 #10
0
def test_locus_reads_substitution_shorter():
    # test CC>G subsitution at 2nd and 3rd nucleotides of reference sequence
    # "ACCTTG", for which the alignment is interpreted as a C>G variant
    # followed by the deletion of a C
    variant = Variant("1", 2, ref="CC", alt="G")
    print(variant)
    pysam_read = make_pysam_read(seq="AGTTG", cigar="2M1D3M", mdtag="1C^C4")

    samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read])
    read_creator = ReadCollector()
    reads = read_creator.get_locus_reads(samfile, "chromosome", 1, 3)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    print(reads)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         read_base0_start_inclusive=1,
                         read_base0_end_exclusive=2,
                         reference_base0_start_inclusive=1,
                         reference_base0_end_exclusive=3)
    assert_equal_fields(read, expected)
예제 #11
0
def test_locus_reads_snv():
    """
    test_partitioned_read_sequences_snv : Test that read gets correctly
    partitioned for chr1:4 T>G where the sequence for chr1 is assumed
    to be "ACCTTG"
    """
    # chr1_seq = "ACCTTG"
    variant = Variant("1", 4, ref="T", alt="G")

    pysam_read = make_pysam_read(seq="ACCGTG", cigar="6M", mdtag="3G2")

    samfile = MockAlignmentFile(references=("chromosome", ),
                                reads=[pysam_read])
    read_creator = ReadCollector()
    reads = read_creator.get_locus_reads(samfile, "chromosome",
                                         variant.start - 1, variant.start)
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, 2, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         reference_base0_start_inclusive=3,
                         reference_base0_end_exclusive=4,
                         read_base0_start_inclusive=3,
                         read_base0_end_exclusive=4)
    assert_equal_fields(read, expected)
예제 #12
0
def test_locus_reads_substitution_longer():
    # test C>GG subsitution at second nucleotide of reference sequence "ACCTTG",
    # the alignment is interpreted as a C>G variant followed by an insertion of
    # another G
    variant = Variant("1", 2, ref="C", alt="GG")
    print(variant)
    pysam_read = make_pysam_read(seq="AGGCTTG", cigar="2M1I4M", mdtag="1C4")

    samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read])
    read_creator = ReadCollector()
    reads = read_creator.get_locus_reads(samfile, "chromosome", 1, 2)
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(name=pysam_read.qname,
                         sequence=pysam_read.query_sequence,
                         reference_positions=[0, 1, None, 2, 3, 4, 5],
                         quality_scores=pysam_read.query_qualities,
                         read_base0_start_inclusive=1,
                         read_base0_end_exclusive=3,
                         reference_base0_start_inclusive=1,
                         reference_base0_end_exclusive=2)
    assert_equal_fields(read, expected)
예제 #13
0
def test_locus_reads_insertion():
    """
    test_partitioned_read_sequences_insertion : Test that read gets correctly
    partitioned for chr1:4 T>TG
    where the sequence for chr1 is assumed to be "ACCTTG"
    and the variant sequence is "ACCTGTG"
    """
    variant = Variant("chromosome",
                      4,
                      ref="T",
                      alt="TG",
                      normalize_contig_name=False)

    pysam_read = make_read(seq="ACCTGTG", cigar="4M1I2M", mdtag="6")

    samfile = DummySamFile(reads=[pysam_read])
    reads = list(
        locus_read_generator(samfile=samfile,
                             chromosome="chromosome",
                             base1_position_before_variant=variant.start,
                             base1_position_after_variant=variant.start + 1))
    print(reads)
    assert len(reads) == 1, \
        "Expected to get back one read but instead got %d" % (
            len(reads),)
    read = reads[0]
    expected = LocusRead(
        name=pysam_read.qname,
        sequence=pysam_read.query_sequence,
        # expect the inserted nucleotide to be missing a corresponding
        # ref position
        reference_positions=[0, 1, 2, 3, None, 4, 5],
        quality_scores=pysam_read.query_qualities,
        base0_read_position_before_variant=3,
        base0_read_position_after_variant=5)
    assert_equal_fields(read, expected)