def test_concat_ref(self): """Verify the concat ref file is created if and only if requested. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta( directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 concat_ref_file = os.path.join(directory.path, "concat_ref.fasta") args.concat_ref_file = None snpmutator.run_from_args(args) file_exists = os.path.exists(concat_ref_file) self.assertFalse( file_exists, "The concat ref file should not exist when not explicitly requested" ) args.concat_ref_file = concat_ref_file snpmutator.run_from_args(args) file_exists = os.path.exists(concat_ref_file) self.assertTrue(file_exists, "The concat ref file is missing when requested.")
def test_not_all_same_pool_mono(self): """Verify Mutator creates different mutated fasta files when generating more than one. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta( directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 3 args.num_subs = 2 args.num_insertions = 2 args.num_deletions = 2 args.subset_len = 500 args.mono = True snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") mutated_seq_record3 = read_fasta_seq_record("original_mutated_3.fasta") self.assertNotEqual( str(mutated_seq_record1.seq), str(mutated_seq_record2.seq), "Generated sequences 1 and 2 should be different.") self.assertNotEqual( str(mutated_seq_record2.seq), str(mutated_seq_record3.seq), "Generated sequences 2 and 3 should be different.") self.assertNotEqual( str(mutated_seq_record1.seq), str(mutated_seq_record3.seq), "Generated sequences 1 and 3 should be different.")
def test_summary_creation(self): """Verify the summary file is created if and only if requested. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta( directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 summary_file_path = os.path.join(directory.path, "original_snpListMutated.txt") args.summary_file = None snpmutator.run_from_args(args) summary_file_exists = os.path.exists(summary_file_path) self.assertFalse( summary_file_exists, "The summary file should not exist when not explicitly requested") args.summary_file = summary_file_path snpmutator.run_from_args(args) summary_file_exists = os.path.exists(summary_file_path) self.assertTrue(summary_file_exists, "The summary file is missing when requested.")
def test_seqid_override(self): """Verify output fasta files can have the defline seqid overridden. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta( directory.path, "original.fasta", 50) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_subs = 3 original_seq_record = read_fasta_seq_record(original_file_path) args.seq_id = None snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( original_seq_record.id, mutated_seq_record1.id, "Defline seq id should not change when not requested.") args.seq_id = "test_override_seqid" snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( mutated_seq_record1.id, args.seq_id, 'Overridden defline seq id "%s" does not match expected value "%s"' % (mutated_seq_record1.id, args.seq_id))
def test_zero_changes(self): """Verify the output fasta file matches the input fasta file when zero substitions, insertions, and deletions are requested. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta(directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 snpmutator.run_from_args(args) no_change = compare_mutated_fasta_files(original_file_path, "original_mutated_1.fasta") self.assertTrue(no_change, "Generated fasta file does not match original fasta file")
def test_eligible_snp_changes(self): """Test substitutions where some positions are ineligible """ directory = TempDirectory() dna = "12345aaaaaAAAAA12345" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_subs = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "12345GGTCTCGTGC12345", "Eligible SNP test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_zero_changes(self): """Verify the output fasta file matches the input fasta file when zero substitions, insertions, and deletions are requested. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta( directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 snpmutator.run_from_args(args) no_change = compare_mutated_fasta_files(original_file_path, "original_mutated_1.fasta") self.assertTrue( no_change, "Generated fasta file does not match original fasta file")
def test_eligible_snp_changes(self): """Test substitutions where some positions are ineligible """ directory = TempDirectory() dna = "12345aaaaaAAAAA12345" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_subs = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "12345GGTCTCGTGC12345", "Eligible SNP test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_mono_mix_no_del(self): """Verify that Monomorphic mutations are the same in all replicates. """ directory = TempDirectory() dna = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.subset_len = 6 args.mono = True args.num_sims = 2 args.num_subs = 3 args.num_deletions = 0 args.num_insertions = 3 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual(str(mutated_seq_record1.seq), str(mutated_seq_record2.seq), "Monomorphic mix of mutations do not match, mutated seq 1=%s mutated seq 2=%s" % (str(mutated_seq_record1.seq), str(mutated_seq_record2.seq)))
def test_not_all_same(self): """Verify Mutator creates different mutated fasta files when generating more than one. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta(directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 3 args.num_subs = 2 args.num_insertions = 2 args.num_deletions = 2 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") mutated_seq_record3 = read_fasta_seq_record("original_mutated_3.fasta") self.assertNotEqual(str(mutated_seq_record1.seq), str(mutated_seq_record2.seq), "Generated sequences 1 and 2 should be different.") self.assertNotEqual(str(mutated_seq_record2.seq), str(mutated_seq_record3.seq), "Generated sequences 2 and 3 should be different.") self.assertNotEqual(str(mutated_seq_record1.seq), str(mutated_seq_record3.seq), "Generated sequences 1 and 3 should be different.")
def test_vcf_creation(self): """Verify the VCF file is created if and only if requested. """ directory = TempDirectory() original_file_path, dna = write_random_dna_fasta(directory.path, "original.fasta", 1000) args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 vcf_file_path = os.path.join(directory.path, "test.vcf") args.vcf_file = None snpmutator.run_from_args(args) vcf_file_exists = os.path.exists(vcf_file_path) self.assertFalse(vcf_file_exists, "The VCF file should not exist when not explicitly requested") args.vcf_file = vcf_file_path snpmutator.run_from_args(args) vcf_file_exists = os.path.exists(vcf_file_path) self.assertTrue(vcf_file_exists, "The VCF file is missing when requested.")
def test_mutate_mix_changes(self): """Test a mix of substitutions, inserts, and deletes. """ directory = TempDirectory() dna = "GGGGGGGGGG" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_subs = 1 args.num_insertions = 1 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "GGTGGGGAGG", "Mutate mix 1,1,1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_subs = 2 args.num_insertions = 2 args.num_deletions = 2 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "GTGTGCGGGC", "Mutate mix 2,2,2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_subs = 3 args.num_insertions = 4 args.num_deletions = 3 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "GCTGTAGTGAC", "Mutate mix 3,4,3 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_delete_changes(self): """Test various numbers of deletions. """ directory = TempDirectory() dna = "GCCAAATCGG" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "GCAAATCGG", "Delete 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_deletions = 5 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "CAACG", "Delete 5 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_deletions = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "", "Delete 10 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_insert_changes(self): """Test various numbers of insertions. """ directory = TempDirectory() dna = "TTTTAATTTT" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_insertions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "TTTGTAATTTT", "Insert 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_insertions = 5 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "TCTTGTATATATTTC", "Insert 5 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_insertions = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "TCTATGTTATATTATTTCTC", "Insert 10 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_mono_mix_no_del(self): """Verify that Monomorphic mutations are the same in all replicates. """ directory = TempDirectory() dna = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.subset_len = 6 args.mono = True args.num_sims = 2 args.num_subs = 3 args.num_deletions = 0 args.num_insertions = 3 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual( str(mutated_seq_record1.seq), str(mutated_seq_record2.seq), "Monomorphic mix of mutations do not match, mutated seq 1=%s mutated seq 2=%s" % (str(mutated_seq_record1.seq), str(mutated_seq_record2.seq)))
def test_pooling(self): """Verify that pooling places mutations at the same location in all replicates. """ directory = TempDirectory() dna = "AAAAAAAAAA" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 2 args.subset_len = 1 args.num_subs = 1 args.num_insertions = 0 args.num_deletions = 0 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual( str(mutated_seq_record1.seq), 'AATAAAAAAA', "Pooling SNP replicate 1 test failed, dna=%s mutated seq1=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual( str(mutated_seq_record2.seq), 'AACAAAAAAA', "Pooling SNP replicate 2 test failed, dna=%s mutated seq2=%s" % (dna, str(mutated_seq_record2.seq))) args.num_subs = 0 args.num_insertions = 1 args.num_deletions = 0 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual( str(mutated_seq_record1.seq), 'AAAGAAAAAAA', "Pooling INS replicate 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual( str(mutated_seq_record2.seq), 'AAACAAAAAAA', "Pooling INS replicate 2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record2.seq))) args.num_subs = 0 args.num_insertions = 0 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual( str(mutated_seq_record1.seq), 'AAAAAAAAA', "Pooling DEL replicate 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual( str(mutated_seq_record2.seq), 'AAAAAAAAA', "Pooling DEL replicate 2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq)))
def test_mutate_mix_changes(self): """Test a mix of substitutions, inserts, and deletes. """ directory = TempDirectory() dna = "GGGGGGGGGG" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_subs = 1 args.num_insertions = 1 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "GGTGGGGAGG", "Mutate mix 1,1,1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_subs = 2 args.num_insertions = 2 args.num_deletions = 2 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "GTGTGCGGGC", "Mutate mix 2,2,2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_subs = 3 args.num_insertions = 4 args.num_deletions = 3 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual( str(mutated_seq_record.seq), "GCTGTAGTGAC", "Mutate mix 3,4,3 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_pooling(self): """Verify that pooling places mutations at the same location in all replicates. """ directory = TempDirectory() dna = "AAAAAAAAAA" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 2 args.subset_len = 1 args.num_subs = 1 args.num_insertions = 0 args.num_deletions = 0 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual(str(mutated_seq_record1.seq), 'AATAAAAAAA', "Pooling SNP replicate 1 test failed, dna=%s mutated seq1=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual(str(mutated_seq_record2.seq), 'AACAAAAAAA', "Pooling SNP replicate 2 test failed, dna=%s mutated seq2=%s" % (dna, str(mutated_seq_record2.seq))) args.num_subs = 0 args.num_insertions = 1 args.num_deletions = 0 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual(str(mutated_seq_record1.seq), 'AAAGAAAAAAA', "Pooling INS replicate 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual(str(mutated_seq_record2.seq), 'AAACAAAAAAA', "Pooling INS replicate 2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record2.seq))) args.num_subs = 0 args.num_insertions = 0 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record1 = read_fasta_seq_record("original_mutated_1.fasta") mutated_seq_record2 = read_fasta_seq_record("original_mutated_2.fasta") self.assertEqual(str(mutated_seq_record1.seq), 'AAAAAAAAA', "Pooling DEL replicate 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq))) self.assertEqual(str(mutated_seq_record2.seq), 'AAAAAAAAA', "Pooling DEL replicate 2 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record1.seq)))
def test_insert_changes(self): """Test various numbers of insertions. """ directory = TempDirectory() dna = "TTTTAATTTT" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_insertions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "TTTGTAATTTT", "Insert 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_insertions = 5 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "TCTTGTATATATTTC", "Insert 5 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_insertions = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "TCTATGTTATATTATTTCTC", "Insert 10 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def test_delete_changes(self): """Test various numbers of deletions. """ directory = TempDirectory() dna = "GCCAAATCGG" original_file_path = write_fixed_dna_fasta(dna, directory.path, "original.fasta") args = make_default_args(original_file_path) args.random_seed = 1 args.num_sims = 1 args.num_deletions = 1 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "GCAAATCGG", "Delete 1 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_deletions = 5 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "CAACG", "Delete 5 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq))) args.num_deletions = 10 snpmutator.run_from_args(args) mutated_seq_record = read_fasta_seq_record("original_mutated_1.fasta") self.assertEqual(str(mutated_seq_record.seq), "", "Delete 10 test failed, dna=%s mutated seq=%s" % (dna, str(mutated_seq_record.seq)))
def _run_snp_mutator(**kwargs): return run_from_args(_generate_snp_mutator_args(tmp_path, **kwargs))