def test_partition_variant_reads_deletion(): alignment_file = load_bam("data/cancer-wgs-primary.chr12.bam") chromosome = "chr12" base1_location = 70091490 ref = "TTGTAGATGCTGCCTCTCC" alt = "" variant = Variant(contig=chromosome, start=base1_location, ref=ref, alt=alt, ensembl=ensembl_grch38) read_collector = ReadCollector() read_evidence = read_collector.read_evidence_for_variant( alignment_file=alignment_file, variant=variant) assert len(read_evidence.alt_reads) > 1 for variant_read in read_evidence.alt_reads: eq_(variant_read.allele, alt)
def test_partition_variant_reads_snv(): alignment_file = load_bam("data/cancer-wgs-primary.chr12.bam") chromosome = "chr12" base1_location = 65857041 ref = "G" alt = "C" variant = Variant(contig=chromosome, start=base1_location, ref=ref, alt=alt, ensembl=ensembl_grch38) read_collector = ReadCollector() read_evidence = read_collector.read_evidence_for_variant( alignment_file=alignment_file, variant=variant) alt_reads = read_evidence.alt_reads assert len(alt_reads) > 1 for variant_read in alt_reads: eq_(variant_read.allele, alt)
def test_group_unique_sequences(): samfile = load_bam("data/cancer-wgs-primary.chr12.bam") chromosome = "chr12" base1_location = 65857041 ref = "G" alt = "C" variant = Variant(contig=chromosome, start=base1_location, ref=ref, alt=alt, ensembl="hg38") read_collector = ReadCollector() variant_reads = read_collector.allele_reads_supporting_variant( alignment_file=samfile, variant=variant) print("%d variant reads: %s" % (len(variant_reads), variant_reads)) groups = group_unique_sequences(variant_reads, max_prefix_size=30, max_suffix_size=30) print("%d unique sequences: %s" % (len(groups), groups)) # there are some redundant reads, so we expect that the number of # unique entries should be less than the total read partitions assert len(variant_reads) > len(groups)
def test_translate_variant_collection(): variants = load_vcf("data/b16.f10/b16.vcf") samfile = load_bam("data/b16.f10/b16.combined.sorted.bam") read_evidence_gen = ReadCollector().read_evidence_generator( variants, samfile) translation_gen = ProteinSequenceCreator().translate_variants(read_evidence_gen) translations = list(translation_gen) eq_( len(translations), 4, "Expected %d translated variants but got %d: %s" % ( len(variants), len(translations), translations))