Ejemplo n.º 1
0
    def it_drop_all_darks():
        with tmp.tmp_folder(chdir=True):
            prep_result = PrepResult.prep_result_fixture(
                pros=[".", "DD", "EE"],
                pro_is_decoys=[False, False, False],
                peps=[".", "DD", "EE"],
                pep_pro_iz=[0, 1, 2],
            )
            n_peptides = 3
            sim_params = _stub_sim_params(no_error_model, n_samples)
            sim_result = sim_v1_worker.sim_v1(sim_params, prep_result)
            assert sim_result.test_dyemat.shape == (
                0,
                n_channels,
                n_cycles,
            )
            assert sim_result.test_dyemat.dtype == np.uint8
            assert np.all(sim_result.test_dyemat[:] == 0)  # All dark

            assert sim_result.train_dyemat.shape == (
                1,
                n_channels,
                n_cycles,
            )
            assert sim_result.train_dyemat.dtype == np.uint8
            assert np.all(sim_result.train_pep_recalls[:] == 0.0)
Ejemplo n.º 2
0
 def it_allows_env_set_for_the_yaml_file():
     with tmp_folder(chdir=True, remove=False) as t:
         lines = [
             "version: 1",
             "formatters:",
             "  json:",
             "    class: plaster.tools.zlog.zlog.TypeAwareJsonFormatter",
             "    format: '%(asctime)s %(levelname)s %(message)s %(filename)s %(lineno)d'",
             "handlers:",
             "  console:",
             "    class: logging.StreamHandler",
             "    stream: ext://sys.stdout",
             "    formatter: json",
             "    level: DEBUG",
             "loggers:",
             "  plaster:",
             "    level: DEBUG",
             "    handlers: [console]",
             "    propagate: 0",
         ]
         with open(t / "logger.yaml", "w") as f:
             f.write("\n".join(lines))
         _run("normal_traces",
              plaster_zlog_config_path=t / "logger.yaml")
         lines = so.split("\n")
         s = json.loads(lines[0])
         assert "wrote to module logger.debug" in s["message"]
Ejemplo n.º 3
0
    def it_generates_flu_info():
        with tmp.tmp_folder(chdir=True):
            prep_result = PrepResult.prep_result_fixture(
                pros=[".", "XAXCD", "XAXCDXX", "XCCXX"],
                pro_is_decoys=[False, False, False, False],
                peps=[".", "XAXCD", "XAXCDXX", "XCCXX"],
                pep_pro_iz=[0, 1, 2, 3],
            )
            sim_params = _stub_sim_params(some_error_model, n_samples)
            sim_result = sim_v1_worker.sim_v1(sim_params, prep_result)
            sim_result._generate_flu_info(prep_result)

            def it_computes_head_and_tail():
                _flus = sim_result._flus
                assert np.all(_flus[_flus.pep_i.isin([1, 2])].flu_count == 2)
                assert np.all(_flus[_flus.pep_i.isin([1, 2])].n_head_ch_0 == 1)
                assert np.all(_flus[_flus.pep_i.isin([1, 2])].n_head_ch_1 == 0)
                assert np.all(_flus[_flus.pep_i.isin([1, 2])].n_tail_ch_0 == 0)
                assert np.all(_flus[_flus.pep_i.isin([1, 2])].n_tail_ch_1 == 1)
                assert np.all(_flus[_flus.pep_i == 3].flu_count == 1)

            def it_peps__flus():
                df = sim_result.peps__flus(prep_result)
                assert "flustr" in df
                assert len(df) == 4

            def it_peps__flus__unique_flus():
                df = sim_result.peps__flus__unique_flus(prep_result)
                assert np.all(df.pep_i.values == [0, 3])

            zest()
Ejemplo n.º 4
0
 def it_resizes():
     with tmp.tmp_folder(chdir=True):
         ar = ArrayResult("test1", shape=(10, 5), dtype=np.uint8, mode="w+")
         ar[:] = np.arange(10 * 5).astype(np.uint8).reshape((10, 5))
         ar.reshape((4, 5))
         assert ar.shape == (4, 5)
         assert np.all(ar.arr() == np.arange(4 * 5).astype(np.uint8).reshape((4, 5)))
Ejemplo n.º 5
0
    def it_saves_and_loads_array_results():
        with tmp.tmp_folder() as folder:
            with local.cwd(folder):
                shape = (100, 87)
                arr = ArrayResult("arr.arr",
                                  dtype=np.float64,
                                  shape=shape,
                                  mode="w+")
                r = np.random.uniform(size=shape)
                arr[:] = r

                res1 = ComplexPropertyResult(foo=3, arr=arr)
                res1.save()

                pickle_file = local.path(ComplexPropertyResult.filename)
                assert (
                    pickle_file.stat().st_size < 200
                )  # The important part is that it doesn't include the array!

                arr_file = local.path("arr.arr")
                assert (arr_file.stat().st_size == shape[0] * shape[1] * 8
                        )  # 8 bytes for a float64

            # It should go back to a different folder
            # but the load_from_folder() should be able
            # deal with that
            assert local.cwd != folder

            res2 = ComplexPropertyResult.load_from_folder(folder)
            assert res2.foo == 3
            assert np.all(res2.arr == r)
Ejemplo n.º 6
0
    def it_returns_the_fraction_of_all_dark_samples():
        with tmp.tmp_folder(chdir=True):
            n_samples = 5000
            sim_params = _stub_sim_params(
                ErrorModel.from_defaults(n_channels=2), n_samples)
            prep_result = PrepResult.prep_result_fixture(
                pros=[".", "ABCDEFGHI"],
                pro_is_decoys=[False, False],
                peps=[".", "ABB"],
                pep_pro_iz=[0, 1],
            )

            pep_seq_df = prep_result.pepseqs()

            dyemat, radmat, recall = _make_arrays("test1",
                                                  n_peps=2,
                                                  n_samples=n_samples)
            sim_v1_worker._do_pep_sim(
                pep_seq_df[pep_seq_df.pep_i == 1],
                sim_params,
                n_samples=n_samples,
                output_dyemat=dyemat,
                output_radmat=radmat,
                output_recall=recall,
            )
            assert np.all((0.9 < recall[1]) & (recall[1] < 1.0))
Ejemplo n.º 7
0
def zest_survey_v2_integration():
    """
    This needs a lot of work on figuring out what the metric
    of success of the survey is exactly.

    Also need some brain-dead simpler cases.  Cases where
    the peptides are super clearly separated and make sure
    that we get sensible results.
    """

    with tmp.tmp_folder(chdir=True):
        prep_result = prep_fixtures.result_random_fixture(20)
        sim_v2_result = SimV2Result.from_prep_fixture(prep_result,
                                                      labels="DE,C,Y")
        sim_v2_result.save()
        survey_v2_result = survey_v2_worker.survey_v2(SurveyV2Params(),
                                                      prep_result,
                                                      sim_v2_result)
        # survey_v2_result._survey.to_csv("/erisyon/internal/test.csv")

        # I will need to set the RNG on this to test.
        # There's a weird effect
        # https://docs.google.com/spreadsheets/d/1SrOjdNTpw7uLWU1iS7PFm4kbfNLTnW6Am2t85b-GKww/edit#gid=1462476311
        # Why are 3 peptides with the same flu not all showing each other as the nn?

    zest()
Ejemplo n.º 8
0
    def it_saves_and_loads_a_property_list():
        with tmp.tmp_folder(chdir=True):
            res1 = SimplePropertyResult(foo=2)
            res1.save()
            assert local.path(SimplePropertyResult.filename).exists()

            res2 = SimplePropertyResult.load_from_folder(".")
            assert res2.foo == 2
Ejemplo n.º 9
0
 def it_returns_an_open_array_without_overwrite():
     with tmp.tmp_folder(chdir=True):
         ar = ArrayResult("test1", shape=(10, 5), dtype=np.uint8, mode="w+")
         fp = ar.arr()
         ar[:] = np.arange(10 * 5).astype(np.uint8).reshape((10, 5))
         _fp = ar.arr()
         assert _fp is fp
         ar.flush()
         assert local.path("test1").stat().st_size == 10 * 5
Ejemplo n.º 10
0
 def it_maintains_decoys_for_train():
     with tmp.tmp_folder(chdir=True):
         sim_params = _stub_sim_params(some_error_model, n_samples)
         sim_result = sim_worker.sim(sim_params, prep_result)
         assert sim_result.train_dyemat.shape == (
             n_peptides,
             n_samples,
             n_channels,
             n_cycles,
         )
Ejemplo n.º 11
0
    def it_gets_same_result_as_single_threaded():
        #n_peps, n_samples, n_channels, n_cycles = (50, 1000, 2, 15)
        n_peps, n_samples, n_channels, n_cycles = (20, 100, 2, 15)
        bin_vecs = np.random.randint(
            0, 2, size=(n_peps, n_samples, n_channels, n_cycles)
        )
        dyemat = np.cumsum(bin_vecs, axis=3)[:, :, :, ::-1]
        dyemat[0, 0] = np.zeros((n_channels, n_cycles), dtype=DyeType)
        dyemat = np.repeat(dyemat, 80, 0)
        np.random.shuffle(dyemat)

        with tmp.tmp_folder(chdir=True):
            output_dt_mat_st = ArrayResult(
                "dt_mat_st",
                shape=(n_peps * n_samples, n_channels, n_cycles),
                dtype=DyeType,
                mode="w+",
            ).arr()

            output_dt_mat_mt = ArrayResult(
                "dt_mat_mt",
                shape=(n_peps * n_samples, n_channels, n_cycles),
                dtype=DyeType,
                mode="w+",
            ).arr()

            # prof()
            (
                dyetracks_df_st,
                dt_pep_sources_df_st,
                dye_to_best_pep_df_st,
                flann_st,
                n_dts_st,
            ) = nn._step_1_create_neighbors_lookup_singleprocess(dyemat, output_dt_mat_st)
            # prof("st")

            # prof()
            (
                dyetracks_df_mt,
                dt_pep_sources_df_mt,
                dye_to_best_pep_df_mt,
                flann_mt,
                n_dts_mt,
            ) = nn._step_1_create_neighbors_lookup_multiprocess(dyemat, output_dt_mat_mt)
            # prof("mt")

            assert_frame_equal(dyetracks_df_st, dyetracks_df_mt)
            assert_frame_equal(dt_pep_sources_df_st, dt_pep_sources_df_st)
            assert_frame_equal(dye_to_best_pep_df_st, dye_to_best_pep_df_st)
            assert n_dts_st == n_dts_mt
            assert np.all(output_dt_mat_st == output_dt_mat_mt)
Ejemplo n.º 12
0
 def it_removes_decoys_for_test():
     with tmp.tmp_folder(chdir=True):
         sim_params = _stub_sim_params(some_error_model, n_samples)
         sim_result = sim_worker.sim(sim_params, prep_result)
         assert sim_result.test_dyemat.shape == (
             n_peptides,
             n_samples,
             n_channels,
             n_cycles,
         )
         assert np.all(
             sim_result.test_dyemat[0] == 0)  # Nul should be all zero
         assert np.all(
             sim_result.test_dyemat[4] == 0)  # Decoy should be all zero
         assert sim_result.test_radmat.dtype == np.float32
Ejemplo n.º 13
0
 def it_surveys():
     with tmp.tmp_folder(chdir=True):
         n_samples = 1
         sim_params = _stub_sim_params(some_error_model, n_samples)
         sim_params.is_survey = True
         sim_params.n_samples_train = n_samples
         sim_params.n_samples_test = None
         sim_result = sim_v1_worker.sim_v1(sim_params, prep_result)
         assert sim_result.train_dyemat.shape == (
             n_peptides * n_samples,
             n_channels,
             n_cycles,
         )
         assert sim_result.train_dyemat.dtype == np.uint8
         assert sim_result.test_dyemat is None
Ejemplo n.º 14
0
 def _run(func_name, plaster_zlog_config_path=None):
     with tmp_folder(chdir=True):
         nonlocal rc, so, se, lc, jl
         here_folder = local.path(__file__).dirname
         if plaster_zlog_config_path is None:
             plaster_zlog_config_path = here_folder / "../example_zlog.yaml"
         with local.env(PLASTER_ZLOG_CONFIG_PATH=plaster_zlog_config_path):
             main = here_folder / "./example_main.py"
             rc, so, se = local.python.run((
                 "-u",
                 main,
                 func_name,
             ),
                                           retcode=None)
             log_file = local.path("plaster_example.log")
             lc, jl = None, None
             if log_file.exists():
                 lc = log_file.read()
                 jl = [json.loads(i) for i in lc.split("\n") if i != ""]
Ejemplo n.º 15
0
def synth_to_ims_import_result(synth: Synth):
    chcy_ims = synth.render_chcy()

    with tmp_folder(remove=False) as folder:
        # A tmp folder is needed here because tests can run
        # multi-threaded and we need to avoid collisions
        # It can't be removed because the file will be opened
        # later outside of this scope so we assume that
        # tmp will be garbage collected outside of the
        # test system.

        ims_import_params = ImsImportParams()
        ims_import_result = ImsImportResult(
            folder=folder,
            params=ims_import_params,
            tsv_data=None,
            n_fields=synth.n_fields,
            n_channels=synth.n_channels,
            n_cycles=synth.n_cycles,
            dim=synth.dim[0],
            dtype=np.dtype(OUTPUT_NP_TYPE).name,
            src_dir="",
        )

        for fl_i in range(synth.n_fields):
            field_chcy_arr = ims_import_result.allocate_field(
                fl_i,
                (synth.n_channels, synth.n_cycles, synth.dim[0], synth.dim[1]),
                OUTPUT_NP_TYPE,
            )
            field_chcy_ims = field_chcy_arr.arr()

            field_chcy_ims[:, :, :, :] = chcy_ims

            ims_import_result.save_field(fl_i, field_chcy_arr, None, None)

        ims_import_result.save()

    return ims_import_result
Ejemplo n.º 16
0
    def it_returns_no_all_dark_samples_on_valid_peps():
        with tmp.tmp_folder(chdir=True):
            prep_result = PrepResult.prep_result_fixture(
                pros=[".", "ABCDEFGHI"],
                pro_is_decoys=[False, False],
                peps=[".", "AAA"],
                pep_pro_iz=[0, 1],
            )

            pep_seq_df = prep_result.pepseqs()
            n_samples = 1000
            dyemat, radmat, recall = _make_arrays("test1",
                                                  n_peps=2,
                                                  n_samples=n_samples)
            sim_v1_worker._do_pep_sim(
                pep_seq_df[pep_seq_df.pep_i == 1],
                sim_params,
                n_samples=n_samples,
                output_dyemat=dyemat,
                output_radmat=radmat,
                output_recall=recall,
            )
            assert not np.any(np.all(dyemat[1] == 0, axis=(1, 2)))
Ejemplo n.º 17
0
 def _make_dyemat():
     n_peps = 3
     n_samples = 7
     n_channels = 2
     n_cycles = 5
     with tmp.tmp_folder(chdir=True):
         dyemat = ArrayResult(
             "dyemat",
             shape=(n_peps, n_samples, n_channels, n_cycles),
             dtype=DyeType,
             mode="w+",
         ).arr()
         dyemat[1, 0:5] = np.array([[2, 2, 1, 1, 0], [2, 1, 0, 0, 0],])
         dyemat[1, 5:7] = np.array([[1, 1, 1, 1, 0], [1, 1, 0, 0, 0],])
         dyemat[2, 0:1] = np.array(
             [
                 # Same as dyemat[1][0:5]
                 [2, 2, 1, 1, 0],
                 [2, 1, 0, 0, 0],
             ]
         )
         dyemat[2, 1:7] = np.array(
             [
                 # Unique
                 [3, 3, 2, 2, 0],
                 [2, 1, 0, 0, 0],
             ]
         )
         # output_dt_mat is big enough to hold every possible dyetrack but would
         # be truncated after this call.
         output_dt_mat = ArrayResult(
             "dt_mat",
             shape=(n_peps * n_samples, n_channels, n_cycles),
             dtype=DyeType,
             mode="w+",
         ).arr()
         return dyemat, output_dt_mat
Ejemplo n.º 18
0
    def it_gives_up_on_hard_peptides_and_returns_none():
        with tmp.tmp_folder(chdir=True):
            prep_result = PrepResult.prep_result_fixture(
                pros=[".", "ABCDEFGHI"],
                pro_is_decoys=[False, False],
                peps=[".", "DDD"],
                pep_pro_iz=[0, 1],
            )

            pep_seq_df = prep_result.pepseqs()

            n_samples = 1000
            dyemat, radmat, recall = _make_arrays("test1",
                                                  n_peps=2,
                                                  n_samples=n_samples)
            sim_v1_worker._do_pep_sim(
                pep_seq_df[pep_seq_df.pep_i == 1],
                sim_params,
                n_samples=n_samples,
                output_dyemat=dyemat,
                output_radmat=radmat,
                output_recall=recall,
            )
            assert np.all(recall[:] == 0.0)
Ejemplo n.º 19
0
    def it_sets_all_output_arrays():
        n_peps, n_samples, n_channels, n_cycles = (3, 2, 2, 3)
        nn_params = TestNNParams()
        sim_params = SimParams.construct_from_aa_list(
            ["A", "B"], error_model=ErrorModel.no_errors(n_channels)
        )
        sim_params.error_model.dyes[0].gain = 100.0
        sim_params.error_model.dyes[1].gain = 400.0
        sim_params._build_join_dfs()

        with tmp.tmp_folder(chdir=True):
            train_dyemat = ArrayResult(
                "train_dyemat",
                shape=(n_peps, n_samples, n_channels, n_cycles),
                dtype=DyeType,
                mode="w+",
            )
            train_dyemat[:] = np.array(
                [
                    [  # Pep 0
                        [[0, 0, 0], [0, 0, 0],],  # Sample 0
                        [[0, 0, 0], [0, 0, 0],],  # Sample 1
                    ],
                    [  # Pep 1
                        [[2, 2, 1], [1, 0, 0],],  # Sample 0
                        [[2, 2, 1], [1, 0, 0],],  # Sample 1
                    ],
                    [  # Pep 2
                        [[2, 2, 2], [2, 1, 0],],  # Sample 0
                        [  # Sample 1
                            [2, 2, 1],
                            [1, 0, 0],  # Same same sample 0 & 1 of pep 1
                        ],
                    ],
                ]
            )

            sim_result = SimResult(
                params=sim_params,
                train_dyemat=train_dyemat.arr(),
                # None of the following are used by nn
                train_radmat=ArrayResult(
                    "train_radmat", shape=(1,), dtype=RadType, mode="w+"
                ).arr(),
                train_recalls=ArrayResult(
                    "train_recalls", shape=(1,), dtype=RecallType, mode="w+"
                ).arr(),
                train_flus=ArrayResult(
                    "train_flus", shape=(1,), dtype=DyeType, mode="w+"
                ).arr(),
                train_flu_remainders=ArrayResult(
                    "train_flu_remainders", shape=(1,), dtype=DyeType, mode="w+"
                ).arr(),
            )

            test_radmat = ArrayResult(
                "test_radmat", shape=(3, n_channels, n_cycles), dtype=RadType, mode="w+"
            )
            test_radmat[:] = np.array(
                [
                    [  # pep 1, sample 0 & 1; pep 2, sample 1
                        [2.1, 1.9, 1.1],
                        [
                            0.9,
                            0.1,
                            0.1,
                        ],  # Should pred to dt 1, could be pep 1 or pep 2 but pep 1 has more instances
                    ],
                    [  # pep 0, sample 0
                        [0.1, 0.1, 0.1],
                        [0.1, 0.1, 0.1],  # Should pred to dt 0, must be pep 0
                    ],
                    [  # Pep 2, sample 0
                        [2.1, 1.9, 1.9],
                        [2.1, 1.1, 0.1],  # Should pred to dt 2, must be pep 2
                    ],
                ]
            )
            test_radmat[:, 0, :] *= sim_params.error_model.dyes[0].gain
            test_radmat[:, 1, :] *= sim_params.error_model.dyes[1].gain

            nn_result = nn.nn(nn_params, sim_result, test_radmat.arr())

            assert np.all(
                nn_result.dt_mat.arr()
                == [
                    [[0, 0, 0], [0, 0, 0]],
                    [[2, 2, 1], [1, 0, 0]],
                    [[2, 2, 2], [2, 1, 0]],
                ]
            )

            assert np.all(nn_result.dyetracks_df.dye_i.values == [0, 1, 2])
            assert np.all(nn_result.dyetracks_df.weight.values == [2, 3, 1])

            assert np.all(nn_result.dt_pep_sources_df.dye_i.values == [0, 1, 1, 2])
            assert np.all(nn_result.dt_pep_sources_df.pep_i.values == [0, 1, 2, 2])
            assert np.all(nn_result.dt_pep_sources_df.n_rows.values == [2, 2, 1, 1])

            assert np.all(nn_result.pred_dt_iz.arr() == [1, 0, 2])

            # TODO: Check all the nn_results here
            # Then I need to implement the avoidance of the max calc
            # And then I can profile it on large datasets
            assert np.all(nn_result.pred_pep_iz.arr() == [1, 0, 2])

            assert np.all(
                (0 <= nn_result.scores.arr()) & (nn_result.scores.arr() <= 1.0)
            )
            assert nn_result.scores.shape == (3,)

            assert np.all(
                (0 <= nn_result.dt_scores.arr()) & (nn_result.dt_scores.arr() <= 1.0)
            )
            assert nn_result.dt_scores.shape == (3,)
Ejemplo n.º 20
0
    def _before():
        nonlocal nn_params, radmat, dt_mat, dt_inv_var_mat, dt_weights, flann
        nonlocal channel_i_to_gain_inv, dye_to_best_pep_df, dt_scores, scores
        nonlocal pred_pep_iz, pred_dt_iz, true_dt_iz, true_dyemat

        nn_params = TestNNParams()

        dt_mat = np.array(
            [
                [[0, 0, 0], [0, 0, 0]],  # Target 0
                [[2, 1, 0], [2, 2, 0]],  # Target 1
                [[1, 1, 0], [1, 0, 0]],  # Target 2
            ],
            dtype=DyeType,
        )

        dt_weights = np.array([0, 5, 10], dtype=DyeWeightType)

        true_dyemat = np.array(
            [
                [[1, 1, 0], [1, 0, 0]],  # Target == 2
                [[2, 1, 0], [2, 2, 0]],  # Target == 1
                [[10, 10, 9], [10, 10, 10]],  # Target == None
            ],
            dtype=DyeType,
        )
        radmat = np.array(
            [
                [[1.1, 0.9, 0.0], [1.1, 0.1, 0.0]],  # Target == 2
                [[2.1, 1.1, 0.0], [2.1, 1.9, 0.0]],  # Target == 1
                [[10.0, 10.0, 9.0], [10.0, 10.0, 10.0]],  # Target == None
            ],
            dtype=RadType,
        )

        channel_i_to_vpd = np.array([1.5, 2.0], dtype=RadType)

        channel_i_to_gain = np.array([10.0, 100.0], dtype=RadType)
        radmat = radmat * channel_i_to_gain[None, :, None]
        channel_i_to_gain_inv = 1.0 / channel_i_to_gain

        dt_inv_var_mat = nn._step_2_create_inverse_variances(
            dt_mat, np.array(channel_i_to_vpd)
        )

        flann = nn._create_flann(dt_mat)

        dye_to_best_pep_df = pd.DataFrame(
            dict(dye_i=[0, 1, 2], pep_i=[0, 2, 1], score=[1.0, 0.5, 1.0],)
        )

        n_rows = radmat.shape[0]
        with tmp.tmp_folder(chdir=True):
            dt_scores = ArrayResult(
                "dt_scores", nn.ScoreType, (n_rows,), mode="w+"
            ).arr()
            scores = ArrayResult("scores", nn.ScoreType, (n_rows,), mode="w+").arr()
            pred_pep_iz = ArrayResult(
                "pred_pep_iz", IndexType, (n_rows,), mode="w+"
            ).arr()
            pred_dt_iz = ArrayResult(
                "pred_dt_iz", IndexType, (n_rows,), mode="w+"
            ).arr()
            true_dt_iz = ArrayResult(
                "true_dt_iz", IndexType, (n_rows,), mode="w+"
            ).arr()
Ejemplo n.º 21
0
 def it_removes_decoys_for_test():
     with tmp.tmp_folder(chdir=True):
         sim_params = _stub_sim_params(some_error_model, n_samples)
         sim_result = sim_v1_worker.sim_v1(sim_params, prep_result)
         assert not np.any(sim_result.test_true_pep_iz == 4)
Ejemplo n.º 22
0
 def it_raises_if_train_and_test_identical():
     with tmp.tmp_folder(chdir=True):
         with zest.raises(in_message="are identical"):
             sim_params = _stub_sim_params(no_error_model, n_samples)
             sim_v1_worker.sim_v1(sim_params, prep_result)