def test_postprocess_fill_gaps(self, existing_file, existing_aln, ref_seq, fill_gaps): """Postprocess should make the gaps ambiguous only if requested""" align.postprocess(existing_file, None, True, fill_gaps) output = SeqIO.to_dict(SeqIO.parse(existing_file, "fasta")) for name, record in output.items(): for idx, site in enumerate(existing_aln[name].seq): if site == "-": assert (record.seq[idx] == "N") == fill_gaps
def test_postprocess_prettify_alignment(self, tmpdir, existing_aln, ref_seq): """Postprocess should strip _R_ from reverse-complemented strains and convert all sites to uppercase""" ref_seq.name = "_R_" + ref_seq.name post_align = write_strains(tmpdir, "post_align", [ref_seq] + list(existing_aln.values())) align.postprocess(post_align, None, True, False) output = SeqIO.to_dict(SeqIO.parse(post_align, "fasta")) for name, record in output.items(): assert record.seq == record.seq.upper(), "Sequence was not made uppercase" assert "_R_" not in name, "Reverse-complement prefix not removed"
def test_postprocess_strip_non_reference(self, tmpdir, ref_seq, ref_file): """Postprocess should strip gaps in the reference sequence from other sequences, but not gaps in those sequences""" expected_length = len(ref_seq.seq) - ref_seq.seq.count("-") gapped_seq = MutableSeq(str(ref_seq.seq)) gapped_seq[1] = "-" gapped = SeqRecord(gapped_seq, "GAP") gap_file = write_strains(tmpdir, "gaps", [ref_seq, gapped]) align.postprocess(gap_file, ref_seq.id, True, False) output = SeqIO.to_dict(SeqIO.parse(gap_file, "fasta")) assert "-" not in output[ref_seq.id].seq assert output["GAP"].seq.count("-") == 1 for record in output.values(): assert len(record.seq) == expected_length
def test_postprocess_remove_reference(self, existing_with_ref, ref_seq, keep_ref): """Postprocess should remove the reference strain only if requested""" align.postprocess(existing_with_ref, ref_seq.id, keep_ref, False) output = SeqIO.to_dict(SeqIO.parse(existing_with_ref, "fasta")) assert (ref_seq.id in output) == keep_ref