Beispiel #1
0
def radiometry_cy_ims(cy_ims, locs, reg_psf_samples, peak_mea):
    """
    Compute radiometry on the stack of cycle images for one field on one channel

    Returns:
        output_radmat: ndarray(n_peaks, n_cycles, (sig, noi, bg_med, bg_std))
    """
    with context(cy_ims=cy_ims,
                 locs=locs,
                 reg_psf_samples=reg_psf_samples,
                 peak_mea=peak_mea) as ctx:
        check.array_t(locs, ndim=2, dtype=np.float64)
        n_peaks = locs.shape[0]
        if n_peaks > 0:
            batches = zap.make_batch_slices(n_rows=locs.shape[0],
                                            _batch_size=100)
            with zap.Context(trap_exceptions=False, mode="thread"):
                zap.work_orders([
                    dict(
                        fn=_do_radiometry_field_stack_peak_batch,
                        ctx=ctx,
                        peak_start_i=batch[0],
                        peak_stop_i=batch[1],
                    ) for batch in batches
                ])

    return ctx._out_radiometry
Beispiel #2
0
 def _run(radmat, radmat_filter_mask, dyemat, dyepeps, n_channels):
     with c_nn_v2.context(
             train_dyemat=dyemat,
             train_dyepeps=dyepeps,
             radmat=radmat,
             radmat_filter_mask=radmat_filter_mask,
             priors=params.priors,
             n_channels=n_channels,
             n_neighbors=params.n_neighbors,
             run_row_k_fit=params.run_row_k_fit,
             run_against_all_dyetracks=params.run_against_all_dyetracks,
             scoring_verbose=params.scoring_verbose,
             scoring_verbose_cc=params.scoring_verbose_cc,
             row_k_score_factor=params.row_k_score_factor,
     ) as nn_v2_context:
         # _nn_v2.c chokes if a batch is larger than 1024*16
         batches = zap.make_batch_slices(n_rows=radmat.shape[0],
                                         _batch_size=_batch_size)
         with zap.Context(mode="thread",
                          trap_exceptions=False,
                          progress=progress):
             # This must be thread mode because it operates on the context in shared memory.
             zap.work_orders([
                 dict(
                     fn=c_nn_v2.do_classify_radrows,
                     radrow_start_i=batch[0],
                     n_radrows=batch[1] - batch[0],
                     nn_v2_context=nn_v2_context,
                 ) for batch in batches
             ])
         return nn_v2_context
Beispiel #3
0
 def it_bubbles_exceptions():
     with zest.mock(zap._show_work_order_exception) as m_ex:
         with zest.raises(ValueError):
             work_orders[0].fn = test2
             zap.work_orders(
                 work_orders,
                 _process_mode=True,
                 _trap_exceptions=False,
             )
     assert m_ex.called_once()
Beispiel #4
0
        def it_calls_progress():
            progress = MockFunction()

            work_orders[0].fn = test2
            zap.work_orders(
                work_orders,
                _debug_mode=True,
                _progress=progress,
            )

            assert progress.calls == [
                ((1, 2, False), {}),
                ((2, 2, False), {}),
            ]
Beispiel #5
0
    def classify(self, test_X, keep_all_class_scores, progress=None):

        # TASK: There's some work to be done here to optimize the size
        #  of this split to dial the memory usage

        n_rows = test_X.shape[0]

        if n_rows < 100:
            pred_y, scores, all_class_scores = _do_predict(
                classifier=self.classifier, X=test_X)
        else:
            n_work_orders = n_rows // 100

            results = zap.work_orders(
                [
                    Munch(classifier=self.classifier, X=X, fn=_do_predict)
                    for X in np.array_split(test_X, n_work_orders, axis=0)
                ],
                _trap_exceptions=False,
                _progress=progress,
            )
            pred_y = utils.listi(results, 0)
            scores = utils.listi(results, 1)
            all_class_scores = utils.listi(results, 2)
            pred_y = np.concatenate(pred_y)
            scores = np.concatenate(scores)
            if keep_all_class_scores:
                all_class_scores = np.concatenate(all_class_scores)

        if not keep_all_class_scores:
            all_class_scores = None

        return pred_y, scores, all_class_scores
Beispiel #6
0
    def classify(self, X, progress=None):
        check.array_t(X, ndim=2)

        n_rows = X.shape[0]

        if n_rows < 100:
            winner_y, winner_scores, runnerup_y, runnerup_scores = _do_predict(
                classifier=self.classifier, X=X)
        else:
            n_work_orders = n_rows // 100

            with zap.Context(progress=progress, trap_exceptions=False):
                results = zap.work_orders([
                    Munch(classifier=self.classifier, X=X, fn=_do_predict)
                    for X in np.array_split(X, n_work_orders, axis=0)
                ])
            winner_y = utils.listi(results, 0)
            winner_scores = utils.listi(results, 1)
            runnerup_y = utils.listi(results, 2)
            runnerup_scores = utils.listi(results, 3)

            winner_y = np.concatenate(winner_y)
            winner_scores = np.concatenate(winner_scores)
            runnerup_y = np.concatenate(runnerup_y)
            runnerup_scores = np.concatenate(runnerup_scores)

        return winner_y, winner_scores, runnerup_y, runnerup_scores
Beispiel #7
0
 def it_traps_exceptions():
     work_orders[0].fn = test2
     results = zap.work_orders(
         work_orders,
         _process_mode=True,
         _trap_exceptions=True,
     )
     assert isinstance(results[0], ValueError)
     assert results[1] == 3 + 4 + 5
Beispiel #8
0
        def it_retries():
            progress = MockFunction()
            with zest.mock(zap._mock_BrokenProcessPool_exception) as m:
                m.exceptions(BrokenProcessPool)

                results = zap.work_orders(work_orders,
                                          _process_mode=True,
                                          _progress=progress)
                assert progress.calls == [
                    ((1, 2, True), {}),
                    ((2, 2, True), {}),
                ]
Beispiel #9
0
def sigproc(sigproc_params, ims_import_result, progress=None):
    # CACHE n_channel, n_cycles, dim into sigproc_params by loading one field
    ims = ims_import_result.field_chcy_ims(field_i=0)
    n_inchannels, n_cycles, h, w = ims.shape
    assert h == w
    n_outchannels = sigproc_params.n_output_channels
    sigproc_params._outchannels_inchannels_cycles_dim = (
        n_outchannels,
        n_inchannels,
        n_cycles,
        h,
    )

    if not sigproc_params.channel_indices_for_alignment:
        sigproc_params.channel_indices_for_alignment = list(range(n_inchannels))

    sigproc_result = SigprocV1Result(
        params=sigproc_params,
        n_input_channels=n_inchannels,
        n_channels=n_outchannels,
        n_cycles=n_cycles,
    )

    n_fields = ims_import_result.n_fields
    n_fields_limit = sigproc_params.n_fields_limit
    if n_fields_limit is not None and n_fields_limit < n_fields:
        n_fields = n_fields_limit

    # TASK: I think this would be nicer with the parallel array map
    results = zap.work_orders(
        [
            Munch(
                fn=_do_field,
                field_i=field_i,
                sigproc_params=sigproc_params,
                ims_import_result=ims_import_result,
                sigproc_result=sigproc_result,
            )
            for field_i in range(n_fields)
        ],
        _process_mode=True,
        _trap_exceptions=False,
        _progress=progress,
    )

    # SET the result n_channels (possibly different from input n_channels)
    n_inchannels = np.array(results)
    assert np.all(n_inchannels == n_inchannels[0])
    sigproc_result.n_channels = int(n_inchannels[0])

    return sigproc_result
Beispiel #10
0
def sigproc(sigproc_params, ims_import_result, progress=None):
    """
    Analyze all fields
    """
    calib = Calibration(sigproc_params.calibration)
    assert not calib.is_empty()

    channel_weights = _compute_channel_weights(sigproc_params)

    sigproc_result = SigprocV2Result(
        params=sigproc_params,
        n_input_channels=ims_import_result.n_channels,
        n_channels=sigproc_params.n_output_channels,
        n_cycles=ims_import_result.n_cycles,
        channel_weights=channel_weights,
    )

    n_fields = ims_import_result.n_fields
    n_fields_limit = sigproc_params.n_fields_limit
    if n_fields_limit is not None and n_fields_limit < n_fields:
        n_fields = n_fields_limit

    zap.work_orders(
        [
            Munch(
                fn=_do_sigproc_field,
                ims_import_result=ims_import_result,
                sigproc_params=sigproc_params,
                field_i=field_i,
                sigproc_result=sigproc_result,
            ) for field_i in range(n_fields)
        ],
        _trap_exceptions=False,
        _progress=progress,
    )

    return sigproc_result
Beispiel #11
0
    def all_dfs(self, fn, parallel=False):
        """
        Run fn on every run, assert that each returns af DataFrame
        and then pd.concat all the results into one adding a run_i
        column to that DataFrame.

        Example:
            df = job.all_dfs(lambda run: run.prep.pros())
        """
        df_list = []
        if parallel:

            def wrap_fn(run, run_i):
                res_df = fn(run)
                assert isinstance(res_df, pd.DataFrame)
                res_df["run_i"] = run_i
                res_df["run_name"] = run.manifest.run_name
                return res_df

            work_orders = [
                {"fn": wrap_fn, "args": [run, run_i]}
                for run_i, run in enumerate(self._run_results.values())
            ]

            # TODO: it would be nice to integrate this progress stuff into zap as an optional argument
            progress = tqdm(total=len(work_orders))

            def progress_callback(i, j, retry):
                if not retry:
                    progress.update()

            with zap.Context(trap_exceptions=False, progress=progress_callback):
                df_list = zap.work_orders(work_orders)

            progress.close()
        else:
            for run_i, run in enumerate(self._run_results.values()):
                res_df = fn(run)
                assert isinstance(res_df, pd.DataFrame)
                res_df["run_i"] = run_i
                res_df["run_name"] = run.manifest.run_name
                df_list += [res_df]
        return pd.concat(df_list).reset_index(drop=True)
Beispiel #12
0
def pmap_runstore(fn, work_orders, _clear_cache=False, **kws):
    """
    Parallel run fn over the work_orders.
    Arguments:
        work_orders: a list of dicts.
            Each work_order dict MUST contain a 'run', 'key', and 'args' parameters that are used
            to update the appropriate run's store with that key.
    """
    work_orders = [
        dict(
            **wo,
            fn=_do_store_get_cache_or_execute,
            inner_fn=fn,
            _clear_cache=_clear_cache,
        ) for wo in work_orders
    ]

    p = zap.work_orders(work_orders, **kws)
    # UPDATE stores. This is done in the master process to avoid sync issues
    for wo, result in zip(work_orders, p.results):
        from_cache, result = result
        if not from_cache:
            wo["run"].store[wo["key"]] = result
Beispiel #13
0
 def it_runs_serially():
     results = zap.work_orders(work_orders, _process_mode=True)
     assert results[0] == 1 + 2 + 3
     assert results[1] == 3 + 4 + 5
Beispiel #14
0
    def pr_curve_by_pep(self,
                        return_auc=False,
                        pep_iz=None,
                        force_compute=False,
                        progress=None):
        """
        Obtain pr_curves for every peptide.

        If all params are default, may returned cached information computed
        during the run.

        Returns:
            A (potentially HUGE) df of every P/R for every peptide
            A smaller df with just the pep_i and the Area-Under-Curve

        This uses the work_order system (as opposed to the
        higher-level array_split_map()) because the _do_pep_pr_curve
        returns 3 identical returns AND one scalar; array_split_map() doesn't
        like that.
        """

        # The PR for all peptides is computed during the run (no auc).
        if not return_auc and not force_compute and self._cached_pr is not None:
            df = self._cached_pr
            if pep_iz is not None:
                df = df[df.pep_i.isin(pep_iz)]
            return df.copy()

        if pep_iz is None:
            pep_iz = self._prep_result.peps().pep_i.values
        if isinstance(pep_iz, np.ndarray):
            pep_iz = pep_iz.tolist()
        check.list_t(pep_iz, int)

        with zap.Context(mode="thread",
                         trap_exceptions=False,
                         progress=progress):
            results = zap.work_orders([
                Munch(
                    fn=_do_pep_pr_curve,
                    pep_i=pep_i,
                    bag=self,
                ) for pep_i in pep_iz
            ], )

        df_per_pep = [
            pd.DataFrame(
                dict(
                    pep_i=np.repeat(np.array([pep_i]), prec.shape[0]),
                    prec=prec,
                    recall=recall,
                    score=score,
                )) for pep_i, (prec, recall, score, _) in results
        ]

        if len(df_per_pep) > 0:
            pr_df = pd.concat(df_per_pep, axis=0)
        else:
            pr_df = None

        auc_df = pd.DataFrame(
            [(pep_i, auc) for pep_i, (_, _, _, auc) in results],
            columns=["pep_i", "auc"],
        )

        if return_auc:
            return pr_df, auc_df
        else:
            return pr_df
Beispiel #15
0
def ims_import(src_dir: Path,
               ims_import_params: ImsImportParams,
               progress=None,
               pipeline=None):
    reference_nd2_file_for_metadata = None

    scan_result = _scan_files(src_dir)
    if len(scan_result.nd2_paths) > 0:
        reference_nd2_file_for_metadata = scan_result.nd2_paths[0]

    target_mea = max(scan_result.dim[0], scan_result.dim[1])

    if not utils.is_power_of_2(target_mea):
        new_dim = utils.next_power_of_2(target_mea)
        _convert_message(target_mea, new_dim)
        target_mea = new_dim

    def clamp_fields(n_fields_true: int) -> Tuple[int, int]:
        n_fields = n_fields_true
        n_fields_limit = ims_import_params.get("n_fields_limit")
        if n_fields_limit is not None:
            n_fields = n_fields_limit

        start_field = ims_import_params.get("start_field", 0)
        if start_field + n_fields > n_fields_true:
            n_fields = n_fields_true - start_field

        return start_field, n_fields

    def clamp_cycles(n_cycles_true: int) -> Tuple[int, int]:
        n_cycles = n_cycles_true
        n_cycles_limit = ims_import_params.get("n_cycles_limit")
        if n_cycles_limit is not None:
            n_cycles = n_cycles_limit

        start_cycle = ims_import_params.get("start_cycle", 0)
        if start_cycle is None:
            start_cycle = 0
        if start_cycle + n_cycles > n_cycles_true:
            n_cycles = n_cycles_true - start_cycle

        return start_cycle, n_cycles

    tsv_data = tsv.load_tsv_for_folder(src_dir)

    # ALLOCATE the ImsImportResult
    ims_import_result = ImsImportResult(params=ims_import_params,
                                        tsv_data=Munch(tsv_data))

    dst_ch_i_to_src_ch_i = ims_import_params.dst_ch_i_to_src_ch_i
    if dst_ch_i_to_src_ch_i is None:
        dst_ch_i_to_src_ch_i = [i for i in range(scan_result.n_channels)]

    n_out_channels = len(dst_ch_i_to_src_ch_i)

    # Sanity check that we didn't end up with any src_channels outside of the channel range
    assert all([
        0 <= src_ch_i < scan_result.n_channels
        for src_ch_i in dst_ch_i_to_src_ch_i
    ])

    if ims_import_params.is_z_stack_single_file:
        field_iz, n_cycles_found = _z_stack_import(
            scan_result.nd2_paths[0],
            target_mea,
            ims_import_result,
            dst_ch_i_to_src_ch_i,
            ims_import_params.z_stack_n_slices_per_field,
        )
        n_cycles = ims_import_params.z_stack_n_slices_per_field

    elif ims_import_params.is_movie:
        if scan_result.mode == ScanFileMode.nd2:
            # "Movie mode" means that there aren't any chemical cycles, but rather we are using "cycles" to represent different images in a zstack
            start_field, n_fields = clamp_fields(len(scan_result.nd2_paths))

            # In movie mode, the n_fields from the .nd2 file is becoming n_cycles
            scan_result.n_cycles = scan_result.n_fields
            start_cycle, n_cycles = clamp_cycles(scan_result.n_cycles)

            with zap.Context(progress=progress):
                field_iz, n_cycles_found = zap.arrays(
                    _do_movie_import_nd2,
                    dict(
                        input_field_i=list(
                            range(start_field, start_field + n_fields)),
                        output_field_i=list(range(n_fields)),
                    ),
                    _stack=True,
                    scan_result=scan_result,
                    start_cycle=start_cycle,
                    n_cycles=n_cycles,
                    target_mea=target_mea,
                    import_result=ims_import_result,
                    dst_ch_i_to_src_ch_i=dst_ch_i_to_src_ch_i,
                )
        elif scan_result.mode == ScanFileMode.npy:
            start_field, n_fields = clamp_fields(scan_result.n_fields)
            start_cycle, n_cycles = clamp_cycles(scan_result.n_cycles)

            with zap.Context(progress=progress):
                field_iz, n_cycles_found = zap.arrays(
                    _do_movie_import_npy,
                    dict(
                        input_field_i=list(
                            range(start_field, start_field + n_fields)),
                        output_field_i=list(range(n_fields)),
                    ),
                    _stack=True,
                    scan_result=scan_result,
                    start_cycle=start_cycle,
                    n_cycles=n_cycles,
                    target_mea=target_mea,
                    import_result=ims_import_result,
                    dst_ch_i_to_src_ch_i=dst_ch_i_to_src_ch_i,
                )
        else:
            raise NotImplementedError()

    else:
        start_field, n_fields = clamp_fields(scan_result.n_fields)

        if pipeline:
            pipeline.set_phase(0, 2)

        if scan_result.mode == ScanFileMode.nd2:
            scan_result.n_cycles = len(scan_result.nd2_paths)

            # SCATTER
            with zap.Context(mode="thread", progress=progress):
                zap.arrays(
                    _do_nd2_scatter,
                    dict(
                        cycle_i=list(range(len(scan_result.nd2_paths))),
                        src_path=scan_result.nd2_paths,
                    ),
                    _stack=True,
                    start_field=start_field,
                    n_fields=n_fields,
                    n_channels=scan_result.n_channels,
                    target_mea=target_mea,
                )

        elif scan_result.mode == ScanFileMode.tif:
            # SCATTER
            work_orders = [
                Munch(field_i=k[0], channel_i=k[1], cycle_i=k[2], path=path)
                for k, path in
                scan_result.tif_paths_by_field_channel_cycle.items()
            ]
            with zap.Context(trap_exceptions=False):
                results = zap.work_orders(_do_tif_scatter, work_orders)

            # CHECK that every file exists
            for f in range(n_fields):
                for ch in range(scan_result.n_channels):
                    for cy in range(scan_result.n_cycles):
                        expected = f"__{f:03d}-{ch:02d}-{cy:02d}.npy"
                        if expected not in results:
                            raise FileNotFoundError(
                                f"File is missing in tif pattern: {expected}")

        elif scan_result.mode == ScanFileMode.npy:
            # In npy mode there's no scatter as the files are already fully scattered
            pass

        else:
            raise ValueError(f"Unknown im import mode {scan_result.mode}")

        if pipeline:
            pipeline.set_phase(1, 2)

        # GATHER
        start_cycle, n_cycles = clamp_cycles(scan_result.n_cycles)

        with zap.Context(progress=progress):
            field_iz = zap.arrays(
                _do_gather,
                dict(
                    input_field_i=list(
                        range(start_field, start_field + n_fields)),
                    output_field_i=list(range(0, n_fields)),
                ),
                _stack=True,
                start_cycle=start_cycle,
                n_cycles=n_cycles,
                dim=target_mea,
                import_result=ims_import_result,
                mode=scan_result.mode,
                npy_paths_by_field_channel_cycle=scan_result.
                npy_paths_by_field_channel_cycle,
                dst_ch_i_to_src_ch_i=dst_ch_i_to_src_ch_i,
            )

    if reference_nd2_file_for_metadata:
        with _nd2(reference_nd2_file_for_metadata) as nd2:
            if hasattr(nd2, "metadata"):
                full = Munch(
                    metadata=nd2.metadata,
                    metadata_seq=nd2.metadata_seq,
                )
                ims_import_result._nd2_metadata_full = full

                def me(block_name, default=None):
                    return utils.block_search(full.metadata.SLxExperiment,
                                              block_name, default)

                def mp(block_name, default=None):
                    return utils.block_search(
                        full.metadata_seq.SLxPictureMetadata, block_name,
                        default)

                n_channels = mp("sPicturePlanes.uiSampleCount", 1)

                ims_import_result._nd2_metadata = Munch(
                    calibrated_pixel_size=mp("dCalibration"),
                    experiment_type="movie" if me("eType") == 1 else "edman",
                    n_cycles=me("uLoopPars.uiCount"),
                    cmd_before=me("wsCommandBeforeCapture"),
                    cmd_after=me("wsCommandAfterCapture"),
                    n_channels=n_channels,
                )

                per_channel = []
                for ch_i in range(n_channels):
                    laser_wavelength = None
                    laser_power = None
                    n_lasers = mp(
                        f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_uiMultiLaserLines0",
                        0,
                    )
                    for i in range(n_lasers):
                        is_used = mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_bMultiLaserLineUsed0-{i:02d}",
                            0,
                        )
                        if is_used == 1:
                            laser_wavelength = mp(
                                f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_uiMultiLaserLineWavelength0-{i:02d}",
                                0,
                            )
                            laser_power = mp(
                                f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_dMultiLaserLinePower0-{i:02d}",
                                0,
                            )

                    ch_munch = Munch(
                        laser_wavelength=laser_wavelength,
                        laser_power=laser_power,
                        camera_name=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.CameraUniqueName"
                        ),
                        sensor_pixels_x=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.sizeSensorPixels.cx"
                        ),
                        sensor_pixels_y=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.sizeSensorPixels.cy"
                        ),
                        sensor_microns_x=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.sizeSensorMicrons.cx"
                        ),
                        sensor_microns_y=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.sizeSensorMicrons.cy"
                        ),
                        bin_x=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.dBinningX"
                        ),
                        bin_y=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.dBinningY"
                        ),
                        format=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.fmtDesc.wszFormatDesc"
                        ),
                        roi_l=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.rectSensorUser.left"
                        ),
                        roi_r=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.rectSensorUser.right"
                        ),
                        roi_t=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.rectSensorUser.top"
                        ),
                        roi_b=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.FormatQuality.rectSensorUser.bottom"
                        ),
                        averaging=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.PropertiesQuality.Average"
                        ),
                        integration=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.PropertiesQuality.Integrate"
                        ),
                        name=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pCameraSetting.Metadata.Channels.Channel_0.Name"
                        ),
                        dichroic_filter=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_sFilterName0"
                        ),
                        emission_filter=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_sFilterName1"
                        ),
                        optivar=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_dZoomPosition"
                        ),
                        tirf_focus=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_dTIRFPositionFocus"
                        ),
                        tirf_align_x=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_dTIRFPositionX"
                        ),
                        tirf_align_y=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pDeviceSetting.m_dTIRFPositionY"
                        ),
                        objective_mag=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pObjectiveSetting.dObjectiveMag"
                        ),
                        objective_na=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pObjectiveSetting.dObjectiveNA"
                        ),
                        objective_refractive_index=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.pObjectiveSetting.dRefractIndex"
                        ),
                        settings_name=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.sOpticalConfigs.\x02.sOpticalConfigName"
                        ),
                        readout_mode=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.sSpecSettings.Readout Mode"
                        ),
                        readout_rate=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.sSpecSettings.Readout Rate"
                        ),
                        noise_filter=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.sSpecSettings.Noise Filter"
                        ),
                        temperature=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.sSpecSettings.Temperature"
                        ),
                        exposure=mp(
                            f"sPicturePlanes.sSampleSetting.a{ch_i}.dExposureTime"
                        ),
                    )
                    per_channel += [ch_munch]

                ims_import_result._nd2_metadata.update(**Munch(
                    per_channel=per_channel))

                if me("eType") == 1:
                    # Movie mode
                    ims_import_result._nd2_metadata.update(**Munch(
                        movie_start=me("dStart"),
                        movie_period=me("dPeriod"),
                        movie_duration=me("dDuration"),
                        movie_duration_pref=me("bDurationPref"),
                        movie_max_period_diff=me("dMaxPeriodDiff"),
                        movie_min_period_diff=me("dMinPeriodDiff"),
                        movie_avg_period_diff=me("dAvgPeriodDiff"),
                    ))

    ims_import_result.n_fields = len(field_iz)
    ims_import_result.n_channels = n_out_channels
    ims_import_result.n_cycles = n_cycles
    ims_import_result.dim = target_mea
    ims_import_result.dtype = np.dtype(OUTPUT_NP_TYPE).name
    ims_import_result.src_dir = src_dir

    # CLEAN
    for file in local.cwd // "__*":
        file.delete()

    return ims_import_result
Beispiel #16
0
def _step_1_create_neighbors_lookup_multiprocess(dyemat, output_dt_mat):
    """
    The dyemat may have many duplicate rows, each from some number of peps.

    These duplicate rows are consolidated so that each coordinate in dyemat space
    is given a unique "dye_i".

    The unique (sorted) dyetracks are written to output_dt_mat which is expected
    to be large enough to hold them.

    In thie multiprocess version I use all the cores to break the set
    into seprate unqies and then compbine them. This tends to be
    at least twice as fast.

    Returns:
        dyetracks_df: DF(dye_i, weight).
            Where weight is the sum of all rows that pointed to this dyetrack
        dt_pep_sources_df: DF(dye_i, pep_i, n_rows)
            Records how many times each peptide generated dye_i where count > 0.
        flann: A fast Approximate Nearest Neighbors lookup using PYFLANN.
        n_dts: Number of actual unique dts
    """
    check.array_t(dyemat,
                  ndim=4)  # (n_peps, n_samples, n_channels, n_cycles): uint8

    # A multithreaded version of uniqueification
    # The idea is to divide the list into blocks, unique them
    # then unique this much smaller set.
    # This is tricky because we have to keep track of the
    # counts and inverse.

    n_peps, n_samples, n_channels, n_cycles = dyemat.shape
    true_pep_iz = np.repeat(np.arange(n_peps), n_samples)
    n_rows = n_peps * n_samples
    n_cols = n_channels * n_cycles
    flat_dyemat = dyemat.reshape((n_rows, n_cols))

    n_batches = _cpu_count()
    batch_size = max(1, (n_rows // n_batches) + 1)
    batch_slices = []
    for batch_i in range(n_batches):
        start = batch_i * batch_size
        stop = min((batch_i + 1) * batch_size, n_rows)
        if stop > start:
            batch_slices += [slice(start, stop)]

    # prof()
    result_batches = zap.work_orders(
        [
            Munch(fn=_do_batch_unique, rng=batch_slice, dyemat=flat_dyemat)
            for batch_slice in batch_slices
        ],
        _process_mode=True,
        _trap_exceptions=False,
    )
    # prof()

    # At this point results has a unique results from each batch
    # and now we need to merge them.
    # First we concatenate them all into a new array
    cat_dts = np.concatenate([batch[0] for batch in result_batches])
    # prof()

    # Stack all the true_dt_iz (which comes from the inverse of unique)
    # but then each of these has to be incremented to index into the
    # concatenated stack
    i = 0
    cat_true_dt_iz = []
    for batch in result_batches:
        true_dt_iz = batch[1]
        cat_true_dt_iz += [true_dt_iz + i]
        i += batch[0].shape[0]
    cat_true_dt_iz = np.concatenate(cat_true_dt_iz)
    # prof()

    # Stack all the counts
    cat_dt_counts = np.concatenate([batch[2] for batch in result_batches])
    # prof()

    # Unique on the batches
    dt_mat, true_dt_iz, dt_counts = np.unique(cat_dts,
                                              return_inverse=True,
                                              return_counts=True,
                                              axis=0)
    # prof()

    dt_counts = np.array([
        cat_dt_counts[np.argwhere(true_dt_iz == i)].sum()
        for i in range(dt_mat.shape[0])
    ])
    # prof()

    true_dt_iz = true_dt_iz[cat_true_dt_iz]

    # prof()
    n_dts = dt_mat.shape[0]
    output_dt_mat[0:n_dts] = dt_mat.reshape((n_dts, n_channels, n_cycles))

    # prof()

    flann = _create_flann(dt_mat)
    dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df = _setup_pep_source_dfs(
        true_dt_iz, true_pep_iz, dt_counts)
    return dyetracks_df, dt_pep_sources_df, dye_to_best_pep_df, flann, n_dts
Beispiel #17
0
def ims_import(src_dir, ims_import_params, progress=None, pipeline=None):
    (
        mode,
        nd2_paths,
        tif_paths_by_field_channel_cycle,
        npy_paths_by_field_channel_cycle,
        n_fields_true,
        n_channels,
        n_cycles_true,
        dim,
    ) = _scan_files(src_dir)

    target_dim = max(dim[0], dim[1])

    if not utils.is_power_of_2(target_dim):
        new_dim = utils.next_power_of_2(target_dim)
        _convert_message(target_dim, new_dim)
        target_dim = new_dim

    src_channels = list(range(n_channels))

    def clamp_fields(n_fields_true):
        n_fields = n_fields_true
        n_fields_limit = ims_import_params.get("n_fields_limit")
        if n_fields_limit is not None:
            n_fields = n_fields_limit

        start_field = ims_import_params.get("start_field", 0)
        if start_field + n_fields > n_fields_true:
            n_fields = n_fields_true - start_field

        return start_field, n_fields

    def clamp_cycles(n_cycles_true):
        n_cycles = n_cycles_true
        n_cycles_limit = ims_import_params.get("n_cycles_limit")
        if n_cycles_limit is not None:
            n_cycles = n_cycles_limit

        start_cycle = ims_import_params.get("start_cycle", 0)
        if start_cycle + n_cycles > n_cycles_true:
            n_cycles = n_cycles_true - start_cycle

        return start_cycle, n_cycles

    tsv_data = tsv.load_tsv_for_folder(src_dir)
    ims_import_result = ImsImportResult(params=ims_import_params,
                                        tsv_data=Munch(tsv_data))

    if ims_import_params.is_movie:
        start_field, n_fields = clamp_fields(len(nd2_paths))

        # In movie mode, the n_fields from the .nd2 file is becoming n_cycles
        n_cycles_true = n_fields_true
        start_cycle, n_cycles = clamp_cycles(n_cycles_true)

        field_iz, n_cycles_found = zap.arrays(
            _do_movie_import,
            dict(
                nd2_path=nd2_paths[start_field:start_field + n_fields],
                output_field_i=list(range(n_fields)),
            ),
            _process_mode=True,
            _progress=progress,
            _stack=True,
            start_cycle=start_cycle,
            n_cycles=n_cycles,
            target_dim=target_dim,
            nd2_import_result=ims_import_result,
        )

    else:
        start_field, n_fields = clamp_fields(n_fields_true)

        if pipeline:
            pipeline.set_phase(0, 2)

        if mode == "nd2":
            n_cycles_true = len(nd2_paths)

            # SCATTER
            zap.arrays(
                _do_nd2_scatter,
                dict(cycle_i=list(range(len(nd2_paths))), src_path=nd2_paths),
                _process_mode=True,
                _progress=progress,
                _stack=True,
                start_field=start_field,
                n_fields=n_fields,
                n_channels=n_channels,
                target_dim=target_dim,
            )

        elif mode == "tif":
            # SCATTER
            work_orders = [
                Munch(field_i=k[0], channel_i=k[1], cycle_i=k[2], path=path)
                for k, path in tif_paths_by_field_channel_cycle.items()
            ]
            results = zap.work_orders(_do_tif_scatter,
                                      work_orders,
                                      _trap_exceptions=False)

            # CHECK that every file exists
            for f in range(n_fields):
                for ch in range(n_channels):
                    for cy in range(n_cycles_true):
                        expected = f"__{f:03d}-{ch:02d}-{cy:02d}.npy"
                        if expected not in results:
                            raise FileNotFoundError(
                                f"File is missing in tif pattern: {expected}")

        elif mode == "npy":
            # In npy mode there's no scatter as the files are already fully scattered
            pass

        else:
            raise ValueError(f"Unknown im import mode {mode}")

        if pipeline:
            pipeline.set_phase(1, 2)

        # GATHER
        start_cycle, n_cycles = clamp_cycles(n_cycles_true)

        field_iz = zap.arrays(
            _do_gather,
            dict(
                input_field_i=list(range(start_field, start_field + n_fields)),
                output_field_i=list(range(0, n_fields)),
            ),
            _process_mode=True,
            _progress=progress,
            _stack=True,
            src_channels=src_channels,
            start_cycle=start_cycle,
            n_cycles=n_cycles,
            dim=target_dim,
            nd2_import_result=ims_import_result,
            mode=mode,
            npy_paths_by_field_channel_cycle=npy_paths_by_field_channel_cycle,
        )

    ims_import_result.n_fields = len(field_iz)
    ims_import_result.n_channels = n_channels
    ims_import_result.n_cycles = n_cycles
    ims_import_result.dim = target_dim

    # CLEAN
    for file in local.cwd // "__*":
        file.delete()

    return ims_import_result