コード例 #1
0
ファイル: zest_survey_v2.py プロジェクト: erisyon/plaster
def zest_survey_v2_pyx():
    # TODO: This

    prep_result = prep_fixtures.result_simple_fixture(True)

    sim_v2_result = SimV2Result.from_prep_fixture(prep_result, labels="A,B")

    # pep 0:
    # pep 1: 10000 11000
    # pep 2: 00000 21100
    # pep 3: 00000 21000

    pep_i_to_mic_pep_i, pep_i_to_isolation_metric = survey_v2_fast.survey(
        prep_result.n_peps,
        sim_v2_result.train_dyemat,
        sim_v2_result.train_dyepeps,
        n_threads=1,
        progress=None,
    )

    assert pep_i_to_mic_pep_i.tolist() == [0, 3, 3, 2]

    # In the current verion they are all close
    # The first peptide should be a long way away and the other two should collide
    assert pep_i_to_isolation_metric[1] < 2
    assert pep_i_to_isolation_metric[2] < 2
    assert pep_i_to_isolation_metric[3] < 2

    # TODO: I really want to do a better job where I compare some contrived
    # peptides and make sure that the outliers are outliers
    # I also need to do the sampling to figure out that magic number of the "nothing close"

    # TODO: Test for unlabelled peptides. I'm sure it is broken

    zest()
コード例 #2
0
ファイル: zest_sim_v2_worker.py プロジェクト: erisyon/plaster
    def decoys():
        prep_with_decoy = prep_fixtures.result_simple_fixture(has_decoy=True)
        sim_v2_result, sim_v2_params = _sim(priors=dict(), _prep_result=prep_with_decoy)

        # def it_maintains_decoys_for_train():
        #     assert sim_v2_result.train_dyemat.shape == (4, 10)
        #
        # def it_removes_decoys_for_test():
        #     # 1000 because the nul-dye track should be removed
        #     assert sim_v2_result.test_radmat.shape == (1000, 2, 5)

        zest()
コード例 #3
0
ファイル: zest_call_bag.py プロジェクト: erisyon/plaster
def zest_call_bag_fdr():
    stub_sim_result = Munch(train_pep_recalls=np.array([-1.0, 0.1, 0.2, 0.3]))
    stub_prep_result = prep_fixtures.result_simple_fixture(has_decoy=True)

    true_pep_iz = np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
    pred_pep_iz = np.array([1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 1])
    scores = np.array(
        [0.8, 0.9, 0.7, 0.6, 0.85, 0.53, 0.54, 0.55, 0.75, 0.4, 0.3, 0.35])

    cb = CallBag(
        sim_result=stub_sim_result,
        prep_result=stub_prep_result,
        true_pep_iz=true_pep_iz,
        pred_pep_iz=pred_pep_iz,
        scores=scores,
    )

    fdr_df = cb.fdr_from_decoys_df()
コード例 #4
0
ファイル: zest_sim_v2_worker.py プロジェクト: erisyon/plaster
def zest_sim_v2_worker():
    prep_result = prep_fixtures.result_simple_fixture()

    def _sim(priors=None, _prep_result=None, sim_kwargs=None):
        if _prep_result is None:
            _prep_result = prep_result

        priors = PriorsMLEFixtures.fixture_no_errors(**(priors or {}))

        if sim_kwargs is None:
            sim_kwargs = {}

        sim_kwargs["use_lognormal_model"] = True

        sim_v2_params = SimV2Params.from_aa_list_fixture(
            ["A", "B"], priors=priors, n_edmans=4, **(sim_kwargs or {})
        )

        return sim_v2_worker.sim_v2(sim_v2_params, _prep_result), sim_v2_params

    @zest.retry(2)
    def it_returns_train_dyemat():
        # Because it has no errors, there's only a perfect dyemats
        sim_v2_result, sim_v2_params = _sim()
        assert sim_v2_result.train_dyemat.shape == (4, 5 * 2)  # 5 cycles, 2 channels
        assert utils.np_array_same(
            sim_v2_result.train_dyemat,
            np.array(
                [
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 2, 1, 0, 0, 0],
                    [0, 0, 0, 0, 0, 2, 1, 1, 0, 0],
                    [1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
                ],
                dtype=np.uint8,
            ),
        )

    def it_returns_train_dyemat_with_a_zero_row():
        sim_v2_result, sim_v2_params = _sim()
        assert np.all(sim_v2_result.train_dyemat[0, :] == 0)

    @zest.retry(2)
    def it_returns_train_dyemat_for_cleaved_cterm_labels():
        prep_cterm = prep_fixtures.result_cterm_label_fixture()

        # dyemat when allow_edman_cterm is True
        sim_v2_result, sim_v2_params = _sim(
            _prep_result=prep_cterm, sim_kwargs=Munch(allow_edman_cterm=True)
        )
        assert sim_v2_result.train_dyemat.shape == (6, 5 * 2)  # 5 cycles, 2 channels
        assert utils.np_array_same(
            sim_v2_result.train_dyemat,
            np.array(
                [
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 1, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 0],
                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
                ],
                dtype=np.uint8,
            ),
        )

    @zest.retry(2)
    def it_returns_train_dyemat_for_uncleaved_cterm_labels():
        prep_cterm = prep_fixtures.result_cterm_label_fixture()

        # dyemat when allow_edman_cterm is False (default)
        sim_v2_result, sim_v2_params = _sim(
            _prep_result=prep_cterm, sim_kwargs=Munch(allow_edman_cterm=False)
        )
        assert sim_v2_result.train_dyemat.shape == (3, 5 * 2)  # 5 cycles, 2 channels
        assert utils.np_array_same(
            sim_v2_result.train_dyemat,
            np.array(
                [
                    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
                ],
                dtype=np.uint8,
            ),
        )

    def it_returns_train_dyepeps():
        sim_v2_result, sim_v2_params = _sim()

        # The order of dyts is not guaranteed, so remove them
        assert utils.np_array_same(
            sim_v2_result.train_dyepeps[:, (0, 2)],
            np.array([[0, 0], [1, 5000], [2, 5000], [3, 5000],], dtype=np.uint64),
        )

    def it_handles_non_fluorescent():
        sim_v2_result, sim_v2_params = _sim(priors=dict(p_non_fluorescent=0.5))
        # Check that every dyepep other than the nul-row
        # should have n_reads (col=2) a lot less than 5000.
        assert np.all(sim_v2_result.train_dyepeps[1:, 2] < 2000)

    def it_returns_no_all_dark_samples():
        sim_v2_result, sim_v2_params = _sim(priors=dict(p_non_fluorescent=0.99))
        assert not np.any(sim_v2_result.train_dyepeps[1:, 0] == 0)

    def it_returns_recalls():
        sim_v2_result, sim_v2_params = _sim(priors=dict(p_non_fluorescent=0.50))
        assert sim_v2_result.train_pep_recalls.shape[0] == 4  # 4 peps
        assert (
            sim_v2_result.train_pep_recalls[0] == 0.0
        )  # The nul record should have no recall
        assert np.all(
            sim_v2_result.train_pep_recalls[1:] < 0.85
        )  # The exact number is hard to say, but it should be < 1

    def it_emergency_escapes():
        sim_v2_result, sim_v2_params = _sim(priors=dict(p_non_fluorescent=0.99))
        # When nothing is fluorescent, everything should have zero recall
        assert np.all(sim_v2_result.train_pep_recalls == 0.0)

    def it_handles_empty_dyepeps():
        sim_v2_result, sim_v2_params = _sim(priors=dict(p_non_fluorescent=1.0))
        assert np.all(sim_v2_result.train_pep_recalls == 0.0)

    def decoys():
        prep_with_decoy = prep_fixtures.result_simple_fixture(has_decoy=True)
        sim_v2_result, sim_v2_params = _sim(priors=dict(), _prep_result=prep_with_decoy)

        # def it_maintains_decoys_for_train():
        #     assert sim_v2_result.train_dyemat.shape == (4, 10)
        #
        # def it_removes_decoys_for_test():
        #     # 1000 because the nul-dye track should be removed
        #     assert sim_v2_result.test_radmat.shape == (1000, 2, 5)

        zest()

    def it_skips_row_noise():
        sim_v2_result, sim_v2_params = _sim(priors=dict(row_k_sigma=0.0))
        spy(sim_v2_result.test_true_row_ks)
        assert np.all(sim_v2_result.test_true_row_ks == 1.0)

    def it_adds_row_noise():
        sim_v2_result, sim_v2_params = _sim(priors=dict(row_k_sigma=0.5))
        assert np.any(sim_v2_result.test_true_row_ks != 1.0)

    @zest.skip(reason="Not implemented")
    def it_raises_if_train_and_test_identical():
        raise NotImplementedError

    zest()
コード例 #5
0
ファイル: zest_call_bag.py プロジェクト: erisyon/plaster
def zest_pr_pro_curve_edge_cases():
    """
    Testing situations in which calls are all wrong or all right,
    for pr_curve_pro (protein rather than peptide based)
    """
    # first entry is null peptide
    stub_sim_result = Munch(train_pep_recalls=np.array([-1.0, 1.0, 1.0, 1.0]))
    stub_prep_result = prep_fixtures.result_simple_fixture()

    # CallBag: all right / all wrong
    true_pep_iz = [1] * 10
    pred_pep_iz_1 = [1] * 10
    pred_pep_iz_2 = [2] * 10
    scores = [0.5] * 10
    cb_all_right = CallBag(
        sim_result=stub_sim_result,
        prep_result=stub_prep_result,
        true_pep_iz=true_pep_iz,
        pred_pep_iz=pred_pep_iz_1,
        scores=scores,
    )
    cb_all_wrong = CallBag(
        sim_result=stub_sim_result,
        prep_result=stub_prep_result,
        true_pep_iz=true_pep_iz,
        pred_pep_iz=pred_pep_iz_2,
        scores=scores,
    )

    zero_pr_result = [[0.0, 0.0], [0.0, 0.0], [0.5, 0.0], [0.0, 0.0]]
    one_pr_result = [[1.0, 1.0], [1.0, 1.0], [0.5, 0.0], [1.0, 1.0]]

    def it_computes_zero_pr():
        p, r, s, a = cb_all_wrong.pr_curve_pro(n_steps=2)
        assert utils.np_array_same([p, r, s, a], zero_pr_result)

    def it_computes_zero_pr_for_subset():
        p, r, s, a = cb_all_wrong.pr_curve_pro(pro_iz_subset=[1], n_steps=2)
        assert utils.np_array_same([p, r, s, a], zero_pr_result)

        p, r, s, a = cb_all_wrong.pr_curve_pro(pro_iz_subset=[2], n_steps=2)
        assert utils.np_array_same([p, r, s, a], zero_pr_result)

        # peptide 2 does not show up in true/pred at all so should get zero pr curve
        p, r, s, a = cb_all_right.pr_curve_pro(pro_iz_subset=[2], n_steps=2)
        assert utils.np_array_same([p, r, s, a], zero_pr_result)

    def it_computes_one_pr():
        p, r, s, a = cb_all_right.pr_curve_pro(n_steps=2)
        # ugh, can't use utils.flatten bc elems are ndarray, not list or tuple
        compare = [a == b for a, b in zip([p, r, s, a], one_pr_result)]
        compare = [
            list(el) if type(el) is np.ndarray else el for el in compare
        ]
        assert all(compare)

    def it_computes_one_pr_for_subset():
        p, r, s, a = cb_all_right.pr_curve_pro(pro_iz_subset=[1], n_steps=2)
        compare = [a == b for a, b in zip([p, r, s, a], one_pr_result)]
        compare = [
            list(el) if type(el) is np.ndarray else el for el in compare
        ]
        assert all(compare)

    def it_handles_all_rows_no_recall():
        p, r, s, a = cb_all_wrong.pr_curve_pro()
        assert np.all(r == 0.0)

    zest()
コード例 #6
0
ファイル: zest_call_bag.py プロジェクト: erisyon/plaster
def zest_pr_curve_pro_no_tied_scores():
    """
    Testing situations with some right and some wrong calls, but no tied scores.
    """

    # first entry is null peptide
    stub_sim_result = Munch(train_pep_recalls=np.array([-1.0] + [1.0] * 3))
    stub_prep_result = prep_fixtures.result_simple_fixture()

    # pep 1 is predicted correctly 1/4, #2 is 1/2, #3 is 3/4
    # we have mispredictions of #2 and #3 always be to #1, to avoid
    # the unrealistic case of getting the peptide wrong but getting
    # the protein right anyway, which in reality is expected to be rare
    true_pep_iz = np.array([1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3])
    # true pro iz will be [1,1,1,1,2,2,2,2,2,2,2,2]
    pred_pep_iz = np.array([1, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 1])
    # pred pro iz will be [1,2,2,2,2,2,1,1,2,2,2,1]
    scores = np.array(
        [0.8, 0.9, 0.7, 0.6, 0.85, 0.53, 0.54, 0.55, 0.75, 0.4, 0.3, 0.35])

    cb = CallBag(
        sim_result=stub_sim_result,
        prep_result=stub_prep_result,
        true_pep_iz=true_pep_iz,
        pred_pep_iz=pred_pep_iz,
        scores=scores,
    )

    prec = np.array([
        2.0 / 3.0,  # 0.8
        3.0 / 5.0,  # 0.7
        3.0 / 6.0,  # 0.6
        4.0 / 9.0,  # 0.5
        5.0 / 10.0,  # 0.4
        6.0 / 12.0,  # 0.3
        6.0 / 12.0,  # 0.2
        6.0 / 12.0,  # 0.1
        6.0 / 12.0,  # 0.0
    ])
    reca = np.array([
        2.0 / 12.0,  # 0.8
        3.0 / 12.0,  # 0.7
        3.0 / 12.0,  # 0.6
        4.0 / 12.0,  # 0.5
        5.0 / 12.0,  # 0.4
        6.0 / 12.0,  # 0.3
        6.0 / 12.0,  # 0.2
        6.0 / 12.0,  # 0.1
        6.0 / 12.0,  # 0.0
    ])

    def it_computes_combined_pr():
        p, r, s, a = cb.pr_curve_pro(n_steps=10)
        assert np.array_equal(p, prec)
        assert np.allclose(s, np.linspace(0.8, 0.0, 9))
        assert np.array_equal(r, reca)

    def it_computes_subset_pr():
        p, r, s, a = cb.pr_curve_pro(pro_iz_subset=[1], n_steps=10)

        prec = np.array([
            1.0 / 1.0,  # 0.8
            1.0 / 1.0,  # 0.7
            1.0 / 1.0,  # 0.6
            1.0 / 3.0,  # 0.5
            1.0 / 3.0,  # 0.4
            1.0 / 4.0,  # 0.3
            1.0 / 4.0,  # 0.2
            1.0 / 4.0,  # 0.1
            1.0 / 4.0,  # 0.0
        ])
        reca = np.array([
            1.0 / 4.0,  # 0.8
            1.0 / 4.0,  # 0.7
            1.0 / 4.0,  # 0.6
            1.0 / 4.0,  # 0.5
            1.0 / 4.0,  # 0.4
            1.0 / 4.0,  # 0.3
            1.0 / 4.0,  # 0.2
            1.0 / 4.0,  # 0.1
            1.0 / 4.0,  # 0.0
        ])

        assert np.array_equal(p, prec)
        assert np.array_equal(r, reca)
        assert np.allclose(s, np.linspace(0.8, 0.0, 9))

        #
        # Test again for a different subset.
        #
        p, r, s, a = cb.pr_curve_pro(pro_iz_subset=[2], n_steps=10)

        prec = np.array([
            1.0 / 2.0,  # 0.8
            2.0 / 4.0,  # 0.7
            2.0 / 5.0,  # 0.6
            3.0 / 6.0,  # 0.5
            4.0 / 7.0,  # 0.4
            5.0 / 8.0,  # 0.3
            5.0 / 8.0,  # 0.2
            5.0 / 8.0,  # 0.1
            5.0 / 8.0,  # 0.0
        ])
        reca = np.array([
            1.0 / 8.0,  # 0.8
            2.0 / 8.0,  # 0.7
            2.0 / 8.0,  # 0.6
            3.0 / 8.0,  # 0.5
            4.0 / 8.0,  # 0.4
            5.0 / 8.0,  # 0.3
            5.0 / 8.0,  # 0.2
            5.0 / 8.0,  # 0.1
            5.0 / 8.0,  # 0.0
        ])

        assert np.array_equal(p, prec)
        assert np.array_equal(r, reca)
        assert np.allclose(s, np.linspace(0.8, 0.0, 9))

    zest()