def test_init_fdr(self, fetch_formulas_mock):
        ds_config = {
            'analysis_version': 1,
            'fdr': {
                'decoy_sample_size': 20
            },
            'isotope_generation': BASIC_ISOTOPE_GENERATION_CONFIG,
        }
        moldb_fdr_list = init_fdr(ds_config,
                                  [MolecularDB(0, 'test_db', 'version')])

        assert len(moldb_fdr_list) == 1
        _, fdr = moldb_fdr_list[0]
        assert not fdr.td_df.empty
def test_compute_fdr(spark_context, ds_config):
    moldb_fdr_list = init_fdr(ds_config, [MolecularDB(0, 'test_db', 'version')])
    _, fdr = moldb_fdr_list[0]
    formula_map_df = collect_ion_formulas(spark_context, moldb_fdr_list).drop('moldb_id', axis=1)

    formula_metrics_df = pd.DataFrame(
        [(10, 'H3O', 0.99), (11, 'C5H4O', 0.5), (12, 'H2ONa', 0.1)],
        columns=['formula_i', 'ion_formula', 'msm'],
    ).set_index('formula_i')

    metrics_df = compute_fdr(fdr, formula_metrics_df, formula_map_df, None)

    assert len(metrics_df) == 3
    assert sorted(metrics_df.columns.tolist()) == sorted(
        ['ion_formula', 'msm', 'formula', 'modifier', 'fdr']
    )
    def test_decoy_sample_size_30(self, fetch_formulas_mock, spark_context):
        ds_config = {
            'analysis_version': 1,
            'fdr': {
                'decoy_sample_size': 30
            },
            'isotope_generation': BASIC_ISOTOPE_GENERATION_CONFIG,
        }
        moldb_fdr_list = init_fdr(ds_config,
                                  [MolecularDB(0, 'test_db', 'version')])

        df = collect_ion_formulas(spark_context, moldb_fdr_list)

        assert df.columns.tolist() == [
            'moldb_id', 'ion_formula', 'formula', 'modifier'
        ]
        assert df.shape == (62, 4)
    def test_neutral_losses_and_chem_mods(self, fetch_formulas_mock,
                                          spark_context):
        ds_config = {
            'analysis_version': 1,
            'fdr': {
                'decoy_sample_size': 1
            },
            'isotope_generation': FULL_ISOTOPE_GENERATION_CONFIG,
        }
        moldb_fdr_list = init_fdr(ds_config,
                                  [MolecularDB(0, 'test_db', 'version')])

        df = collect_ion_formulas(spark_context, moldb_fdr_list)

        assert df.columns.tolist() == [
            'moldb_id', 'ion_formula', 'formula', 'modifier'
        ]
        # 2 formulas * (4 target adducts + (4 target adducts * 1 decoy adducts per target adduct)
        # * (no loss + 2 neutral losses) * (no mod + 1 chem mod) = 2 * (4 + 4) * 3 * 2 = 96
        assert df.shape == (96, 4)