def test_heidelberg_fasta_ac(subtype_heidelberg_pass): st, df = subtype_contigs(fasta_path=fasta_heidelberg_pass, genome_name=genome_name, scheme=scheme_heidelberg) stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_heidelberg_pass, genome_name=genome_name, scheme=scheme_heidelberg) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) assert isinstance(stgz, Subtype) assert isinstance(dfgz, DataFrame) check_subtype_attrs(st, stgz, subtype_heidelberg_pass) check_df_fasta_cols(df) check_df_fasta_cols(dfgz)
def test_ac_vs_bad_contigs(subtype_enteritidis_fail): st, df = subtype_contigs(fasta_path=fasta_enteritidis_fail, genome_name=genome_name, scheme=scheme_enteritidis) stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_enteritidis_fail, scheme=scheme_enteritidis, genome_name=genome_name) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) assert isinstance(stgz, Subtype) assert isinstance(dfgz, DataFrame) check_subtype_attrs(st, stgz, subtype_enteritidis_fail) check_df_fasta_cols(df) check_df_fasta_cols(dfgz)
def test_typhimurium_scheme(subtype_typhimurium_pass): st, df = subtype_contigs(fasta_path=fasta_typhimurium_pass, genome_name=genome_name, scheme=scheme_typhimurium) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) check_subtype_attrs(st, subtype_typhimurium_pass)
def test_tuberculosis_scheme(subtype_tb_AP018036_pass): st, df = subtype_contigs(fasta_path=fasta_tb_pass, genome_name=genome_name, scheme=scheme_tuberculosis) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) check_subtype_attrs(st, subtype_tb_AP018036_pass)
def test_too_many_kmers(): scheme = 'tests/data/too_many_kmers.fasta' fasta = 'tests/data/fail-qc-missing-levels.fasta' with pytest.raises(SystemExit): assert subtype_contigs(fasta_path=fasta, genome_name=genome_name, scheme=scheme) == SystemExit
def test_enteritidis_scheme_vs_qc_failing_contigs_unconfident_ac(subtype_enteritidis_fail_unconfident): st, df = subtype_contigs(fasta_path=fasta_enteritidis_unconfident, genome_name=genome_name, scheme=scheme_enteritidis) stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_enteritidis_unconfident, genome_name=genome_name, scheme=scheme_enteritidis) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) assert isinstance(stgz, Subtype) assert isinstance(dfgz, DataFrame) check_subtype_attrs(st, stgz, subtype_enteritidis_fail_unconfident) assert 'Unconfident Results Error 4' in st.qc_message assert 'Unconfident Results Error 4' in stgz.qc_message check_df_fasta_cols(df) check_df_fasta_cols(dfgz)
def test_heidelberg_scheme_and_lowcase_seq_inputs( subtype_heidelberg_SRR1002850_pass): st, df = subtype_contigs(fasta_path=fasta_heidelberg_pass, genome_name=genome_name, scheme=scheme_heidelberg) assert isinstance(st, Subtype) assert isinstance(df, DataFrame) check_subtype_attrs(st, subtype_heidelberg_SRR1002850_pass)
def test_missing_hierarchy_levels_in_subtype(): scheme = 'heidelberg' fasta = 'tests/data/fail-qc-missing-levels.fasta' st, df = subtype_contigs(fasta_path=fasta, genome_name=genome_name, scheme=scheme) assert isinstance(st, Subtype) assert isinstance(df, pd.DataFrame) assert st.scheme == scheme assert st.qc_status == QC.FAIL assert QC.UNCONFIDENT_RESULTS_ERROR_4 in st.qc_message assert "kmers for nested hierarchical subtype(s)" in st.qc_message assert "2.1" in st.qc_message assert "2.1.1" in st.qc_message
def test_unconfident_subtype(): scheme = 'enteritidis' fasta = 'tests/data/fail-qc-unconfident-subtype.fasta' st, df = subtype_contigs(fasta_path=fasta, genome_name=genome_name, scheme=scheme) assert isinstance(st, Subtype) assert isinstance(df, pd.DataFrame) assert st.scheme == scheme assert st.qc_status == QC.FAIL assert QC.UNCONFIDENT_RESULTS_ERROR_4 in st.qc_message assert "kmers for downstream subtype(s)" in st.qc_message assert "'2.1.5.4.2'" in st.qc_message assert "'2.1.5.4.1'" in st.qc_message
def test_mixed_subtype_positive_negative_kmers_same_target(): scheme = 'heidelberg' fasta = 'tests/data/fail-qc-mixed-subtype-pos-neg-kmers.fasta' st, df = subtype_contigs(fasta_path=fasta, genome_name=genome_name, scheme=scheme) assert isinstance(st, Subtype) assert isinstance(df, pd.DataFrame) assert st.scheme == scheme assert st.qc_status == QC.FAIL expected_qc_msg = ( 'FAIL: Mixed subtype; the positive and negative kmers were found for the same ' 'target sites 202001, 600783, 1049933, 1193219, 2778621, 2904061, ' '3278067, 3867228, 4499501, 4579224, 4738855, 202001, ' '600783, 1049933, 1193219, 2778621, 2904061, 3278067, ' '3867228, 4499501, 4579224, 4738855 for subtype "1.1".') print(st.qc_message) print(expected_qc_msg) assert expected_qc_msg in st.qc_message