def test_read_imc_mcd(self, raw_path: Path):
     mcd_file_path = raw_path / '20210305_NE_mockData1' / '20210305_NE_mockData1.mcd'
     mcd_parser = McdParser(mcd_file_path)
     xml = mcd_parser.get_mcd_xml()
     mcd_xml_parser = McdXmlParser(xml, str(mcd_file_path))
     assert mcd_xml_parser.session.name == "20210305_NE_mockData1"
Example #2
0
def mcd2ome(mcdfile: File,
            export: File,
            min_height: int = 10,
            min_width: int = 10,
            metadata: bool = False,
            slide: bool = False,
            channels: Optional[List[str]] = None,
            verbose: bool = False
            ):
    """Convert `.mcd` to `.ome.tiff` file

    Args:
        mcdfile:
        export:
        min_height:
        min_width:
        metadata:
        slide:
    """
    mcdfile = Path(mcdfile)
    export = Path(export)

    mcd_parser = McdParser(mcdfile)
    session = mcd_parser.session

    if export.is_file():
        raise NotADirectoryError(f"Cannot export to {export}, not a directory.")
    create_folder(export)

    mcd_xml = mcd_parser.get_mcd_xml()
    if metadata:
        meta_folder = create_folder(export / 'metadata')
        # Save XML metadata if available
        if mcd_xml is not None:
            with open(meta_folder / (session.metaname + "_schema.xml"), "wt") as f:
                f.write(mcd_xml)
        # Save session data in json
        session.save(meta_folder / (session.metaname + "_session.json"))

    if slide:
        slide_folder = create_folder(export / 'slide')
        for key in session.slides.keys():
            mcd_parser.save_slide_image(key, slide_folder)
        for key in session.panoramas.keys():
            mcd_parser.save_panorama_image(key, slide_folder)

    # Save acquisition images in OME-TIFF format
    for acquisition in session.acquisitions.values():
        if not ((acquisition.max_x < min_width) | (acquisition.max_y < min_height)):
            acquisition_data = mcd_parser.get_acquisition_data(acquisition.id)
            if acquisition_data.is_valid:
                # Calculate channels intensity range
                valid_channels = []
                for ch in acquisition.channels.values():
                    img = acquisition_data.get_image_by_name(ch.name)

                    if img is not None:
                        valid_channels.append(ch.name)
                #         ch.min_intensity = round(float(img.min()), 4)
                #         ch.max_intensity = round(float(img.max()), 4)

                export_name = f"{session.name}_slide{acquisition.slide.id}_ROI{acquisition.id}"
                export_name = export_name.replace(" ", "_")
                export_name = export_name.replace(".", "_")
                export_name += ".ome.tiff"
                if verbose:
                    print(f"{len(valid_channels)} channels in {export_name}", ", ".join(valid_channels))

                acquisition_data.save_ome_tiff(
                    export / export_name,
                    xml_metadata=mcd_xml,
                    names=channels,
                )
Example #3
0
File: utils.py Project: bzrry/imc
def mcd_to_dir(
    mcd_file: Path,
    pannel_csv: Path = None,
    ilastik_output: bool = True,
    ilastik_channels: List[str] = None,
    output_dir: Path = None,
    output_format: str = "tiff",
    overwrite: bool = False,
    sample_name: str = None,
    partition_panels: bool = False,
    filter_full: bool = True,
    export_panoramas: bool = True,
    keep_original_roi_names: bool = False,
    allow_empty_rois: bool = True,
    only_crops: bool = False,
    n_crops: int = 5,
    crop_width: int = 500,
    crop_height: int = 500,
) -> None:
    def get_dataframe_from_channels(mcd):
        return pd.DataFrame(
            [mcd.get_acquisition_channels(x) for x in session.acquisition_ids],
            index=session.acquisition_ids,
        )

    def all_channels_equal(mcd):
        chs = get_dataframe_from_channels(mcd)
        return all([(chs[c].value_counts() == mcd.n_acquisitions).all()
                    for c in chs.columns])

    def get_panel_partitions(mcd):
        chs = get_dataframe_from_channels(mcd)

        partitions = {k: set(k) for k in chs.drop_duplicates().index}
        for p in partitions:
            for _, row in chs.iterrows():
                print(p, row.name)
                if (row == chs.loc[list(partitions[p])[0]]).all():
                    partitions[p] = partitions[p].union(set([row.name]))
        return partitions.values()

    def clip_hot_pixels(img, hp_filter_shape=(3, 3), hp_threshold=0.0001):
        if hp_filter_shape[0] % 2 != 1 or hp_filter_shape[1] % 2 != 1:
            raise ValueError("Invalid hot pixel filter shape: %s" %
                             str(hp_filter_shape))
        hp_filter_footprint = np.ones(hp_filter_shape)
        hp_filter_footprint[int(hp_filter_shape[0] / 2),
                            int(hp_filter_shape[1] / 2)] = 0
        max_img = ndi.maximum_filter(img,
                                     footprint=hp_filter_footprint,
                                     mode="reflect")
        hp_mask = img - max_img > hp_threshold
        img = img.copy()
        img[hp_mask] = max_img[hp_mask]
        return img

    if partition_panels:
        raise NotImplementedError(
            "Partitioning sample per panel is not implemented yet.")

    if pannel_csv is None and ilastik_channels is None:
        raise ValueError(
            "One of `pannel_csv` or `ilastik_channels` must be given!")
    if ilastik_channels is None and pannel_csv is not None:
        panel = pd.read_csv(pannel_csv, index_col=0)
        ilastik_channels = panel.query("ilastik == 1").index.tolist()

    H5_YXC_AXISTAG = json.dumps({
        "axes": [
            {
                "key": "y",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "x",
                "typeFlags": 2,
                "resolution": 0,
                "description": "",
            },
            {
                "key": "c",
                "typeFlags": 1,
                "resolution": 0,
                "description": "",
            },
        ]
    })

    if output_dir is None:
        output_dir = mcd_file.parent / "imc_dir"
    output_dir.mkdir(exist_ok=True, parents=True)
    dirs = ["tiffs"] + (["ilastik"] if ilastik_output else [])
    for _dir in dirs:
        (output_dir / _dir).mkdir(exist_ok=True)

    # Export panoramas
    if export_panoramas:
        get_panorama_images(
            mcd_file,
            output_file_prefix=output_dir / "Panorama",
            overwrite=overwrite,
        )

    # Parse MCD
    mcd = McdParser(mcd_file)
    session = mcd.session

    if sample_name is None:
        sample_name = session.name

    for i, ac_id in enumerate(session.acquisition_ids):
        print(ac_id, end="\t")
        try:
            ac = mcd.get_acquisition_data(ac_id)
        except Exception as e:  # imctools.io.abstractparserbase.AcquisitionError
            if allow_empty_rois:
                print(e)
                continue
            raise e

        # Get output prefix
        if keep_original_roi_names:
            prefix = (output_dir / "tiffs" /
                      (session.name.replace(" ", "_") + "_ac"))
        else:
            prefix = (output_dir / "tiffs" /
                      (sample_name + "-" + str(i + 1).zfill(2)))

        # Skip if not overwrite
        file_ending = "ome.tiff" if output_format == "ome-tiff" else "tiff"
        if (prefix + "_full." + file_ending).exists() and not overwrite:
            print(
                "TIFF images exist and overwrite is set to `False`. Continuing."
            )
            continue

        # Filter channels
        channel_labels = build_channel_name(ac.channel_labels,
                                            ac.channel_names)
        to_exp = channel_labels[channel_labels.isin(ilastik_channels)]
        to_exp_ind = [
            ac.channel_masses.index(y)
            for y in to_exp.str.extract(r".*\(..(\d+)\)")[0]
        ]
        assert to_exp_ind == to_exp.index.tolist()

        if filter_full:
            # remove background and empty channels
            # TODO: find way to do this more systematically
            channel_labels = channel_labels[~(
                channel_labels.str.contains(r"^\d")
                | channel_labels.str.contains("<EMPTY>"))].reset_index(
                    drop=True)

        # Filter hot pixels
        ac._image_data = np.asarray(
            [clip_hot_pixels(x) for x in ac.image_data])

        # Save full image
        if not only_crops:
            p = prefix + "_full."
            if output_format == "tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                    )
            elif output_format == "ome-tiff":
                if (overwrite) or not (p + file_ending).exists():
                    ac.save_ome_tiff(
                        p + file_ending,
                        names=channel_labels.str.extract(r"\((.*)\)")[0],
                        xml_metadata=mcd.get_mcd_xml(),
                    )
        # Save channel labels for the stack
        if (overwrite) or not (p + "csv").exists():
            channel_labels.to_csv(p + "csv")

        if not ilastik_output:
            continue

        # Make input for ilastik training
        # # zoom 2x
        s = tuple(x * 2 for x in ac.image_data.shape[1:])
        full = np.moveaxis(
            np.asarray([resize(x, s) for x in ac.image_data[to_exp_ind]]), 0,
            -1)

        # # Save input for ilastik prediction
        with h5py.File(prefix + "_ilastik_s2.h5", mode="w") as handle:
            d = handle.create_dataset("stacked_channels", data=full)
            d.attrs["axistags"] = H5_YXC_AXISTAG

        # # random crops
        iprefix = (output_dir / "ilastik" /
                   (sample_name.replace(" ", "_") + "_ac"))
        # # # make sure height/width are smaller or equal to acquisition dimensions
        if (full.shape[1] < crop_width) or (full.shape[0] < crop_height):
            msg = "Image is smaller than the requested crop size for ilastik training."
            print(msg)
            continue
        for _ in range(n_crops):
            x = np.random.choice(range(s[0] - crop_width))
            y = np.random.choice(range(s[1] - crop_height))
            crop = full[x:(x + crop_width), y:(y + crop_height), :]
            assert crop.shape == (crop_width, crop_height, len(to_exp))
            with h5py.File(
                    iprefix +
                    f"_ilastik_x{x}_y{y}_w{crop_width}_h{crop_height}.h5",
                    mode="w",
            ) as handle:
                d = handle.create_dataset("stacked_channels", data=crop)
                d.attrs["axistags"] = H5_YXC_AXISTAG

    print("")  # add a newline to the tabs
    mcd.close()