def test_partitioned_read_sequences_deletion(): """ test_partitioned_read_sequences_deletion : Test that read gets correctly partitioned for chr1:4 TT>T where the sequence for chr1 is assumed to be "ACCTTG" """ # chr1_seq = "ACCTTG" chromosome = "chromosome" location = 4 ref = "TT" alt = "T" variant = Variant( chromosome, location, ref, alt, grch38, normalize_contig_name=False) read = make_pysam_read( seq="ACCTG", cigar="4M1D1M", mdtag="4^T1") samfile = MockAlignmentFile( references=(chromosome,), reads=[read]) read_creator = ReadCollector() variant_reads = read_creator.allele_reads_supporting_variant( alignment_file=samfile, variant=variant) print(variant_reads) assert len(variant_reads) == 1 variant_read = variant_reads[0] expected = AlleleRead( name=read.qname, prefix="ACCT", allele="", suffix="G") eq_(variant_read, expected)
def test_locus_reads_deletion(): """ test_partitioned_read_sequences_deletion : Test that read gets correctly partitioned for chr1:4 TT>T where the sequence for chr1 is assumed to be "ACCTTG" """ # normalization of this variant will turn it into the deletion of # "T" at base-1 position 5 variant = Variant("1", 4, ref="TT", alt="T") pysam_read = make_pysam_read(seq="ACCTG", cigar="4M1D1M", mdtag="4^T1") samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read]) read_creator = ReadCollector() reads = read_creator.get_locus_reads(samfile, "chromosome", variant.start - 1, variant.start) print(reads) assert len(reads) == 1, \ "Expected to get back one read but instead got %d" % ( len(reads),) read = reads[0] expected = LocusRead( name=pysam_read.qname, sequence=pysam_read.query_sequence, reference_positions=[0, 1, 2, 3, 5], quality_scores=pysam_read.query_qualities, # missing would have gone after 4th nucleotide in the read read_base0_start_inclusive=4, read_base0_end_exclusive=4, reference_base0_start_inclusive=4, reference_base0_end_exclusive=5) assert_equal_fields(read, expected)
def test_locus_reads_insertion(): """ test_partitioned_read_sequences_insertion : Test that read gets correctly partitioned for chr1:4 T>TG where the sequence for chr1 is assumed to be "ACCTTG" and the variant sequence is "ACCTGTG" """ variant = Variant("1", 4, ref="T", alt="TG") pysam_read = make_pysam_read(seq="ACCTGTG", cigar="4M1I2M", mdtag="6") samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read]) read_creator = ReadCollector() reads = read_creator.get_locus_reads(samfile, "chromosome", variant.start, variant.start) print(reads) assert len(reads) == 1, \ "Expected to get back one read but instead got %d" % ( len(reads),) read = reads[0] expected = LocusRead( name=pysam_read.qname, sequence=pysam_read.query_sequence, # expect the inserted nucleotide to be missing a corresponding # ref position reference_positions=[0, 1, 2, 3, None, 4, 5], quality_scores=pysam_read.query_qualities, read_base0_start_inclusive=4, read_base0_end_exclusive=5, reference_base0_start_inclusive=4, reference_base0_end_exclusive=4) print("Actual: %s" % (read, )) print("Expected: %s" % (expected, )) assert_equal_fields(read, expected)
def test_locus_reads_substitution_shorter(): # test CC>G subsitution at 2nd and 3rd nucleotides of reference sequence # "ACCTTG", for which the alignment is interpreted as a C>G variant # followed by the deletion of a C variant = Variant("1", 2, ref="CC", alt="G") print(variant) pysam_read = make_pysam_read(seq="AGTTG", cigar="2M1D3M", mdtag="1C^C4") samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read]) read_creator = ReadCollector() reads = read_creator.get_locus_reads(samfile, "chromosome", 1, 3) assert len(reads) == 1, \ "Expected to get back one read but instead got %d" % ( len(reads),) print(reads) read = reads[0] expected = LocusRead(name=pysam_read.qname, sequence=pysam_read.query_sequence, reference_positions=[0, 1, 3, 4, 5], quality_scores=pysam_read.query_qualities, read_base0_start_inclusive=1, read_base0_end_exclusive=2, reference_base0_start_inclusive=1, reference_base0_end_exclusive=3) assert_equal_fields(read, expected)
def test_locus_reads_snv(): """ test_partitioned_read_sequences_snv : Test that read gets correctly partitioned for chr1:4 T>G where the sequence for chr1 is assumed to be "ACCTTG" """ # chr1_seq = "ACCTTG" variant = Variant("1", 4, ref="T", alt="G") pysam_read = make_pysam_read(seq="ACCGTG", cigar="6M", mdtag="3G2") samfile = MockAlignmentFile(references=("chromosome", ), reads=[pysam_read]) read_creator = ReadCollector() reads = read_creator.get_locus_reads(samfile, "chromosome", variant.start - 1, variant.start) print(reads) assert len(reads) == 1, \ "Expected to get back one read but instead got %d" % ( len(reads),) read = reads[0] expected = LocusRead(name=pysam_read.qname, sequence=pysam_read.query_sequence, reference_positions=[0, 1, 2, 3, 4, 5], quality_scores=pysam_read.query_qualities, reference_base0_start_inclusive=3, reference_base0_end_exclusive=4, read_base0_start_inclusive=3, read_base0_end_exclusive=4) assert_equal_fields(read, expected)
def test_locus_reads_substitution_longer(): # test C>GG subsitution at second nucleotide of reference sequence "ACCTTG", # the alignment is interpreted as a C>G variant followed by an insertion of # another G variant = Variant("1", 2, ref="C", alt="GG") print(variant) pysam_read = make_pysam_read(seq="AGGCTTG", cigar="2M1I4M", mdtag="1C4") samfile = MockAlignmentFile(references={"chromosome"}, reads=[pysam_read]) read_creator = ReadCollector() reads = read_creator.get_locus_reads(samfile, "chromosome", 1, 2) print(reads) assert len(reads) == 1, \ "Expected to get back one read but instead got %d" % ( len(reads),) read = reads[0] expected = LocusRead(name=pysam_read.qname, sequence=pysam_read.query_sequence, reference_positions=[0, 1, None, 2, 3, 4, 5], quality_scores=pysam_read.query_qualities, read_base0_start_inclusive=1, read_base0_end_exclusive=3, reference_base0_start_inclusive=1, reference_base0_end_exclusive=2) assert_equal_fields(read, expected)