def test_contig_coverage_report_offset_reads(projects, sequence_report): hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) seq = ref[50:150] seed_nucs = ([('C', SeedNucleotide())] * 50 + [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq)) expected_head = """\ contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link 1-my-contig,HIV1-B-FR-K03455-seed,51,51,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,52,52,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,53,53,0,1,M """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, consensus_offset=50, seed_nucs=seed_nucs) report_text = report_file.getvalue() head = report_text[:len(expected_head)] assert head == expected_head
def test_write_sequence_coverage_counts_with_some_deletions( projects, sequence_report): """ Some reads had deletions at a position. """ hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) seq = ref[100:150] seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq) seed_nucs[5] = ('G', SeedNucleotide(Counter({'G': 4, '-': 2}))) expected_head = """\ contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link 1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,5,105,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,6,106,2,6,M 1-my-contig,HIV1-B-FR-K03455-seed,7,107,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,8,108,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,9,109,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,10,110,0,1,M """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, seed_nucs=seed_nucs) report_text = report_file.getvalue() head = report_text[:len(expected_head)] assert head == expected_head
def test_write_sequence_coverage_counts_with_coverage(projects, sequence_report): hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) seq = ref[100:150] + ref[1000:1050] seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * 100 seed_nucs[2] = ('G', SeedNucleotide(Counter({'G': 4}))) seed_nucs[98] = ('T', SeedNucleotide(Counter({'T': 5}))) expected_head = """\ contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link 1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,4,M 1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M """ expected_tail = """\ 1-my-contig,HIV1-B-FR-K03455-seed,98,1048,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,99,1049,0,5,M 1-my-contig,HIV1-B-FR-K03455-seed,100,1050,0,1,M """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, seed_nucs=seed_nucs) report_text = report_file.getvalue() head = report_text[:len(expected_head)] tail = report_text[-len(expected_tail):] assert head == expected_head assert tail == expected_tail
def choose_consensus(nuc_row: dict) -> str: coverage = int(nuc_row['coverage']) if coverage < 100: return 'x' nuc = SeedNucleotide() for nuc_seq in nuc.COUNTED_NUCS: source_nuc = 'del' if nuc_seq == '-' else nuc_seq nuc.count_nucleotides(nuc_seq, int(nuc_row[source_nuc])) consensus = nuc.get_consensus(MAX_CUTOFF) if int(nuc_row['ins']) > coverage / 2: consensus += 'i' return consensus
def test_contig_coverage_report_past_reference_start(projects, sequence_report): hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) seq = 'CGTAC' + ref[:100] seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq) # link is (M)apped, (U)nmapped, or (I)nserted expected_head = """\ contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link 1-my-contig,HIV1-B-FR-K03455-seed,1,-4,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,2,-3,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,3,-2,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,4,-1,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,5,0,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,6,1,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,7,2,0,1,M """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, seed_nucs=seed_nucs) report_text = report_file.getvalue() head = report_text[:len(expected_head)] assert head == expected_head
def test_contig_coverage_report_past_reference_end(projects, sequence_report): hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) assert len(ref) == 9719 seq = ref[-100:] + 'CGTAC' seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq) expected_tail = """\ 1-my-contig,HIV1-B-FR-K03455-seed,99,9718,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,100,9719,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,101,9720,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,102,9721,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,103,9722,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,104,9723,0,1,U 1-my-contig,HIV1-B-FR-K03455-seed,105,9724,0,1,U """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, seed_nucs=seed_nucs) report_text = report_file.getvalue() tail = report_text[-len(expected_tail):] assert tail == expected_tail
def test_write_sequence_coverage_counts_with_insert(projects, sequence_report): hxb2_name = 'HIV1-B-FR-K03455-seed' ref = projects.getReference(hxb2_name) seq = ref[100:110] + 'ACTGA' + ref[110:160] seed_nucs = [('C', SeedNucleotide(Counter({'C': 1})))] * len(seq) seed_nucs[12] = ('T', SeedNucleotide(Counter({'T': 4}))) expected_head = """\ contig,coordinates,query_nuc_pos,refseq_nuc_pos,dels,coverage,link 1-my-contig,HIV1-B-FR-K03455-seed,1,101,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,2,102,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,3,103,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,4,104,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,5,105,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,6,106,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,7,107,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,8,108,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,9,109,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,10,110,0,1,M 1-my-contig,HIV1-B-FR-K03455-seed,11,,0,1,I 1-my-contig,HIV1-B-FR-K03455-seed,12,,0,1,I 1-my-contig,HIV1-B-FR-K03455-seed,13,,0,4,I 1-my-contig,HIV1-B-FR-K03455-seed,14,,0,1,I 1-my-contig,HIV1-B-FR-K03455-seed,15,,0,1,I 1-my-contig,HIV1-B-FR-K03455-seed,16,111,0,1,M """ report_file = StringIO() sequence_report.projects = projects sequence_report.write_genome_coverage_header(report_file) sequence_report.write_sequence_coverage_counts('1-my-contig', hxb2_name, seq, seed_nucs=seed_nucs) report_text = report_file.getvalue() head = report_text[:len(expected_head)] assert head == expected_head