def test_heidelberg_fasta_ac(subtype_heidelberg_pass):
    st, df = subtype_contigs(fasta_path=fasta_heidelberg_pass,
                             genome_name=genome_name,
                             scheme=scheme_heidelberg)
    stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_heidelberg_pass,
                                 genome_name=genome_name,
                                 scheme=scheme_heidelberg)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    assert isinstance(stgz, Subtype)
    assert isinstance(dfgz, DataFrame)
    check_subtype_attrs(st, stgz, subtype_heidelberg_pass)
    check_df_fasta_cols(df)
    check_df_fasta_cols(dfgz)
def test_ac_vs_bad_contigs(subtype_enteritidis_fail):
    st, df = subtype_contigs(fasta_path=fasta_enteritidis_fail,
                             genome_name=genome_name,
                             scheme=scheme_enteritidis)
    stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_enteritidis_fail,
                                 scheme=scheme_enteritidis,
                                 genome_name=genome_name)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    assert isinstance(stgz, Subtype)
    assert isinstance(dfgz, DataFrame)
    check_subtype_attrs(st, stgz, subtype_enteritidis_fail)
    check_df_fasta_cols(df)
    check_df_fasta_cols(dfgz)
def test_typhimurium_scheme(subtype_typhimurium_pass):
    st, df = subtype_contigs(fasta_path=fasta_typhimurium_pass,
                             genome_name=genome_name,
                             scheme=scheme_typhimurium)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    check_subtype_attrs(st, subtype_typhimurium_pass)
def test_tuberculosis_scheme(subtype_tb_AP018036_pass):
    st, df = subtype_contigs(fasta_path=fasta_tb_pass,
                             genome_name=genome_name,
                             scheme=scheme_tuberculosis)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    check_subtype_attrs(st, subtype_tb_AP018036_pass)
Exemple #5
0
def test_too_many_kmers():
    scheme = 'tests/data/too_many_kmers.fasta'
    fasta = 'tests/data/fail-qc-missing-levels.fasta'
    with pytest.raises(SystemExit):
        assert subtype_contigs(fasta_path=fasta,
                               genome_name=genome_name,
                               scheme=scheme) == SystemExit
def test_enteritidis_scheme_vs_qc_failing_contigs_unconfident_ac(subtype_enteritidis_fail_unconfident):
    st, df = subtype_contigs(fasta_path=fasta_enteritidis_unconfident,
                             genome_name=genome_name,
                             scheme=scheme_enteritidis)
    stgz, dfgz = subtype_contigs(fasta_path=fasta_gz_enteritidis_unconfident,
                                 genome_name=genome_name,
                                 scheme=scheme_enteritidis)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    assert isinstance(stgz, Subtype)
    assert isinstance(dfgz, DataFrame)
    check_subtype_attrs(st, stgz, subtype_enteritidis_fail_unconfident)
    assert 'Unconfident Results Error 4' in st.qc_message
    assert 'Unconfident Results Error 4' in stgz.qc_message
    check_df_fasta_cols(df)
    check_df_fasta_cols(dfgz)
def test_heidelberg_scheme_and_lowcase_seq_inputs(
        subtype_heidelberg_SRR1002850_pass):
    st, df = subtype_contigs(fasta_path=fasta_heidelberg_pass,
                             genome_name=genome_name,
                             scheme=scheme_heidelberg)
    assert isinstance(st, Subtype)
    assert isinstance(df, DataFrame)
    check_subtype_attrs(st, subtype_heidelberg_SRR1002850_pass)
Exemple #8
0
def test_missing_hierarchy_levels_in_subtype():
    scheme = 'heidelberg'
    fasta = 'tests/data/fail-qc-missing-levels.fasta'
    st, df = subtype_contigs(fasta_path=fasta,
                             genome_name=genome_name,
                             scheme=scheme)
    assert isinstance(st, Subtype)
    assert isinstance(df, pd.DataFrame)
    assert st.scheme == scheme
    assert st.qc_status == QC.FAIL
    assert QC.UNCONFIDENT_RESULTS_ERROR_4 in st.qc_message
    assert "kmers for nested hierarchical subtype(s)" in st.qc_message
    assert "2.1" in st.qc_message
    assert "2.1.1" in st.qc_message
Exemple #9
0
def test_unconfident_subtype():
    scheme = 'enteritidis'
    fasta = 'tests/data/fail-qc-unconfident-subtype.fasta'
    st, df = subtype_contigs(fasta_path=fasta,
                             genome_name=genome_name,
                             scheme=scheme)
    assert isinstance(st, Subtype)
    assert isinstance(df, pd.DataFrame)
    assert st.scheme == scheme
    assert st.qc_status == QC.FAIL
    assert QC.UNCONFIDENT_RESULTS_ERROR_4 in st.qc_message
    assert "kmers for downstream subtype(s)" in st.qc_message
    assert "'2.1.5.4.2'" in st.qc_message
    assert "'2.1.5.4.1'" in st.qc_message
Exemple #10
0
def test_mixed_subtype_positive_negative_kmers_same_target():
    scheme = 'heidelberg'
    fasta = 'tests/data/fail-qc-mixed-subtype-pos-neg-kmers.fasta'
    st, df = subtype_contigs(fasta_path=fasta,
                             genome_name=genome_name,
                             scheme=scheme)
    assert isinstance(st, Subtype)
    assert isinstance(df, pd.DataFrame)
    assert st.scheme == scheme
    assert st.qc_status == QC.FAIL
    expected_qc_msg = (
        'FAIL: Mixed subtype; the positive and negative kmers were found for the same '
        'target sites 202001, 600783, 1049933, 1193219, 2778621, 2904061, '
        '3278067, 3867228, 4499501, 4579224, 4738855, 202001, '
        '600783, 1049933, 1193219, 2778621, 2904061, 3278067, '
        '3867228, 4499501, 4579224, 4738855 for subtype "1.1".')
    print(st.qc_message)
    print(expected_qc_msg)
    assert expected_qc_msg in st.qc_message