예제 #1
0
def read_dataset(config):
    # Read metadata and split dataset in training and validation
    metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"],
                                                      dtype=None)
    if config["prepare"]["compression"]["implement"]:
        metadata.data.replace({
            '.tiff': '.png',
            '.tif': '.png'
        },
                              inplace=True,
                              regex=True)

    # Add outlines if specified
    outlines = None
    if "outlines" in config["prepare"].keys(
    ) and config["prepare"]["outlines"] != "":
        df = pd.read_csv(config["paths"]["metadata"] + "/outlines.csv")
        metadata.mergeOutlines(df)
        outlines = config["paths"]["root"] + "inputs/outlines/"

    print(metadata.data.info())

    # Split training data
    split_field = config["train"]["partition"]["split_field"]
    trainingFilter = lambda df: df[split_field].isin(config["train"][
        "partition"]["training_values"])
    validationFilter = lambda df: df[split_field].isin(config["train"][
        "partition"]["validation_values"])
    metadata.splitMetadata(trainingFilter, validationFilter)

    # Create a dataset
    keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r[
        "Metadata_Well"], r["Metadata_Site"])
    dset = ImageDataset(metadata, config["dataset"]["metadata"]["label_field"],
                        config["dataset"]["images"]["channels"],
                        config["paths"]["images"], keyGen, config)

    # Add training targets
    for t in config["train"]["partition"]["targets"]:
        new_target = deepprofiler.dataset.target.MetadataColumnTarget(
            t, metadata.data[t].unique())
        dset.add_target(new_target)

    # Activate outlines for masking if needed
    if config["dataset"]["locations"]["mask_objects"]:
        dset.outlines = outlines

    dset.prepare_training_locations()

    return dset
예제 #2
0
def read_dataset(config):
    # Read metadata and split dataset in training and validation
    metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None)

    # Add outlines if specified
    outlines = None
    if "outlines" in config["prepare"].keys() and config["prepare"]["outlines"] != "":
        df = pd.read_csv(config["paths"]["locations"] + "/outlines.csv")
        metadata.mergeOutlines(df)
        outlines = config["paths"]["locations"]

    print(metadata.data.info())

    # Split training data
    split_field = config["train"]['dset']["split_field"]
    trainingFilter = lambda df: df[split_field].isin(config['train']["dset"]["training_values"])
    validationFilter = lambda df: df[split_field].isin(config['train']["dset"]["validation_values"])
    metadata.splitMetadata(trainingFilter, validationFilter)

    # Create a dataset
    keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])
    dset = ImageDataset(
        metadata,
        config['train']["sampling"]["field"],
        config['prepare']["images"]["channels"],
        config["paths"]["images"],
        keyGen
    )

    # Add training targets
    for t in config['train']["dset"]["targets"]:
        new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique())
        dset.add_target(new_target)

    # Activate outlines for masking if needed
    if config['train']["dset"]["mask_objects"]:
        dset.outlines = outlines

    return dset
예제 #3
0
def test_split_metadata(metadata, dataframe, out_dir):
    metadata.loadSingle(os.path.join(out_dir, 'test.csv'), 'default', int)
    train_rule = lambda data: data['Metadata_Plate'] < 50
    val_rule = lambda data: data['Metadata_Plate'] >= 50
    metadata.splitMetadata(train_rule, val_rule)
    assert len(metadata.train) + len(metadata.val) == len(metadata.data)