コード例 #1
0
ファイル: image_dataset.py プロジェクト: tlouskt/DeepProfiler
def read_dataset(config):
    # Read metadata and split dataset in training and validation
    metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"],
                                                      dtype=None)
    if config["prepare"]["compression"]["implement"]:
        metadata.data.replace({
            '.tiff': '.png',
            '.tif': '.png'
        },
                              inplace=True,
                              regex=True)

    # Add outlines if specified
    outlines = None
    if "outlines" in config["prepare"].keys(
    ) and config["prepare"]["outlines"] != "":
        df = pd.read_csv(config["paths"]["metadata"] + "/outlines.csv")
        metadata.mergeOutlines(df)
        outlines = config["paths"]["root"] + "inputs/outlines/"

    print(metadata.data.info())

    # Split training data
    split_field = config["train"]["partition"]["split_field"]
    trainingFilter = lambda df: df[split_field].isin(config["train"][
        "partition"]["training_values"])
    validationFilter = lambda df: df[split_field].isin(config["train"][
        "partition"]["validation_values"])
    metadata.splitMetadata(trainingFilter, validationFilter)

    # Create a dataset
    keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r[
        "Metadata_Well"], r["Metadata_Site"])
    dset = ImageDataset(metadata, config["dataset"]["metadata"]["label_field"],
                        config["dataset"]["images"]["channels"],
                        config["paths"]["images"], keyGen, config)

    # Add training targets
    for t in config["train"]["partition"]["targets"]:
        new_target = deepprofiler.dataset.target.MetadataColumnTarget(
            t, metadata.data[t].unique())
        dset.add_target(new_target)

    # Activate outlines for masking if needed
    if config["dataset"]["locations"]["mask_objects"]:
        dset.outlines = outlines

    dset.prepare_training_locations()

    return dset
コード例 #2
0
def test_merge_outlines(metadata, dataframe, out_dir):
    metadata.loadSingle(os.path.join(out_dir, 'test.csv'), 'default', int)
    outlines = pd.DataFrame(
        {
            'Metadata_Plate': __rand_array(),
            'Metadata_Well': __rand_array(),
            'Metadata_Site': __rand_array()
        },
        dtype=int)
    metadata.mergeOutlines(outlines)
    merged = pd.merge(metadata.data,
                      outlines,
                      on=["Metadata_Plate", "Metadata_Well", "Metadata_Site"])
    pd.testing.assert_frame_equal(metadata.data, merged)
コード例 #3
0
def read_dataset(config):
    # Read metadata and split dataset in training and validation
    metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None)

    # Add outlines if specified
    outlines = None
    if "outlines" in config["prepare"].keys() and config["prepare"]["outlines"] != "":
        df = pd.read_csv(config["paths"]["locations"] + "/outlines.csv")
        metadata.mergeOutlines(df)
        outlines = config["paths"]["locations"]

    print(metadata.data.info())

    # Split training data
    split_field = config["train"]['dset']["split_field"]
    trainingFilter = lambda df: df[split_field].isin(config['train']["dset"]["training_values"])
    validationFilter = lambda df: df[split_field].isin(config['train']["dset"]["validation_values"])
    metadata.splitMetadata(trainingFilter, validationFilter)

    # Create a dataset
    keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])
    dset = ImageDataset(
        metadata,
        config['train']["sampling"]["field"],
        config['prepare']["images"]["channels"],
        config["paths"]["images"],
        keyGen
    )

    # Add training targets
    for t in config['train']["dset"]["targets"]:
        new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique())
        dset.add_target(new_target)

    # Activate outlines for masking if needed
    if config['train']["dset"]["mask_objects"]:
        dset.outlines = outlines

    return dset