def test_prepare_with_alignment_ref_sequence_wrong_length(self, test_file, existing_file, ref_seq, ref_file): """Test that including a reference sequence with a length different than the existing alignment fails.""" ref_seq.seq = ref_seq.seq[:-3] with open(ref_file, "w") as fh: SeqIO.write(ref_seq, fh, "fasta") with pytest.raises(align.AlignmentError): align.prepare([test_file,], existing_file, "out", None, ref_file)
def test_prepare_with_alignment_with_named_ref_missing( self, test_with_ref, existing_file, ref_seq): """We're given a ref_name and an existing alignment, but the ref doesn't exist in the existing alignment.""" with pytest.raises(align.AlignmentError): align.prepare([ test_with_ref, ], existing_file, "dontcare", ref_seq.id, None)
def test_prepare_no_alignment_with_named_ref_missing( self, test_file, ref_seq): """We're given a ref_name, but it does not exist in the test file""" with pytest.raises(align.AlignmentError): align.prepare([ test_file, ], None, "dontcare", ref_seq.id, None)
def test_prepare_with_alignment_with_ref_seq(self, test_file, test_seqs, existing_file, existing_aln, ref_seq, ref_file, out_file): """Test that, given a set of test sequences, an existing alignment, and a reference sequence, the reference is added to the existing alignment and no other changes are made.""" aln_outfile, seqs_outfile, ref_name = align.prepare([ test_file, ], existing_file, out_file, None, ref_file) assert ref_name == ref_seq.id, "Didn't return strain name from refrence file" assert os.path.isfile( aln_outfile), "Didn't write existing alignment where it said" assert aln_outfile != existing_aln, "Unexpectedly overwrote existing alignment" # Alignment file should have the reference added aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta")) assert aln_output[ ref_seq. id].seq == ref_seq.seq, "Reference sequence not added to alignment" for seq in existing_aln: assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly" assert aln_output[seq].seq == existing_aln[ seq].seq, "Some existing alignment sequences changed unexpectedly" # test sequences should be unchanged assert os.path.isfile( seqs_outfile), "Didn't write test sequences where it said" seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta")) for seq in test_seqs: assert seq in seq_output, "Some test sequences unexpectedly dropped" assert seq_output[seq].seq == test_seqs[ seq].seq, "Some test sequences changed unexpectedly" assert seq_output.keys() == test_seqs.keys()
def test_prepare_with_alignment_with_ref_name(self, test_file, test_seqs, existing_with_ref, existing_aln, ref_seq, out_file): """Test that, given a set of test sequences, an existing alignment, and a reference sequence name, no changes are made.""" aln_outfile, seqs_outfile, _ = align.prepare([ test_file, ], existing_with_ref, out_file, ref_seq.id, None) assert os.path.isfile( aln_outfile), "Didn't write existing alignment where it said" assert aln_outfile == existing_with_ref, "Rewrote the alignment file unexpectedly" # Alignment file should be unchanged aln_output = SeqIO.to_dict(SeqIO.parse(aln_outfile, "fasta")) assert aln_output[ ref_seq. id].seq == ref_seq.seq, "Reference sequence dropped from alignment" for seq in existing_aln: assert seq in aln_output, "Some existing alignment sequences dropped unexpectedly" assert aln_output[seq].seq == existing_aln[ seq].seq, "Some existing alignment sequences changed unexpectedly" # test sequences should be unchanged assert os.path.isfile( seqs_outfile), "Didn't write test sequences where it said" seq_output = SeqIO.to_dict(SeqIO.parse(seqs_outfile, "fasta")) for seq in test_seqs: assert seq in seq_output, "Some test sequences unexpectedly dropped" assert seq_output[seq].seq == test_seqs[ seq].seq, "Some test sequences changed unexpectedly" assert seq_output.keys() == test_seqs.keys()
def test_prepare_no_alignment_or_ref(self, test_file, test_seqs, out_file): _, output, _ = align.prepare([ test_file, ], None, out_file, None, None) assert os.path.isfile(output), "Didn't write sequences where it said" for name, seq in SeqIO.to_dict(SeqIO.parse(output, "fasta")).items(): assert seq.seq == test_seqs[name].seq
def test_prepare_no_alignment_with_ref_name(self, test_with_ref, test_seqs, ref_seq, out_file): _, output_fn, _ = align.prepare([test_with_ref,], None, out_file, ref_seq.id, None) assert os.path.isfile(output_fn), "Didn't write sequences where it said" output = SeqIO.to_dict(SeqIO.parse(output_fn, "fasta")) assert output[ref_seq.id].seq == ref_seq.seq, "Reference sequence was not added to test sequences" for seq in test_seqs: assert seq in output, "Some test sequences dropped unexpectedly" assert output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
def test_prepare_with_alignment_with_duplicate_sequences( self, test_file, test_seqs, existing_file, existing_aln, out_file): """Test that sequences matching the alignment are removed from the input sequences""" _, seq_outfile, _ = align.prepare([existing_file, test_file], existing_file, out_file, None, None) seq_output = SeqIO.to_dict(SeqIO.parse(seq_outfile, "fasta")) assert seq_output.keys() == test_seqs.keys( ), "Did not strip duplicate sequences from test input!"
def test_prepare_no_alignment_multiple_test_seqs(self, test_file, test_seqs, ref_file, ref_seq, out_file): """Test that we can pass multiple sequence files to prepare() and get one unified file back""" # bit of a kludge, but gets us the reference strain in the input files _, seq_outfile, _ = align.prepare([test_file, ref_file], None, out_file, ref_seq.id, None) seq_output = SeqIO.to_dict(SeqIO.parse(seq_outfile, "fasta")) assert seq_output.keys() == set(test_seqs.keys()) | {ref_seq.id}, "Did not combine the two files" assert seq_output[ref_seq.id].seq == ref_seq.seq, "Missing sequence from second file" for seq in test_seqs: assert seq in seq_output, "Some test sequences unexpectedly dropped" assert seq_output[seq].seq == test_seqs[seq].seq, "Some test sequences changed unexpectedly"
def test_prepare_no_alignment_with_ref_file(self, test_file, test_seqs, ref_file, ref_seq, out_file): _, output_fn, ref_name = align.prepare([test_file,], None, out_file, None, ref_file) assert ref_name == ref_seq.id, "Didn't return strain name from refrence file" assert os.path.isfile(output_fn), "Didn't write sequences where it said" output = list(SeqIO.parse(output_fn, "fasta")) # order matters assert output[0].id == ref_seq.id, "Reference sequence is not the first sequence in ouput file!" output_names = {record.name for record in output} assert all(name in output_names for name in test_seqs), "Some test sequences dropped unexpectedly" for record in output[1:]: assert record.seq == test_seqs[record.id].seq, "Some test sequences changed unexpectedly"