def read_dataset(config): # Read metadata and split dataset in training and validation metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None) if config["prepare"]["compression"]["implement"]: metadata.data.replace({ '.tiff': '.png', '.tif': '.png' }, inplace=True, regex=True) # Add outlines if specified outlines = None if "outlines" in config["prepare"].keys( ) and config["prepare"]["outlines"] != "": df = pd.read_csv(config["paths"]["metadata"] + "/outlines.csv") metadata.mergeOutlines(df) outlines = config["paths"]["root"] + "inputs/outlines/" print(metadata.data.info()) # Split training data split_field = config["train"]["partition"]["split_field"] trainingFilter = lambda df: df[split_field].isin(config["train"][ "partition"]["training_values"]) validationFilter = lambda df: df[split_field].isin(config["train"][ "partition"]["validation_values"]) metadata.splitMetadata(trainingFilter, validationFilter) # Create a dataset keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r[ "Metadata_Well"], r["Metadata_Site"]) dset = ImageDataset(metadata, config["dataset"]["metadata"]["label_field"], config["dataset"]["images"]["channels"], config["paths"]["images"], keyGen, config) # Add training targets for t in config["train"]["partition"]["targets"]: new_target = deepprofiler.dataset.target.MetadataColumnTarget( t, metadata.data[t].unique()) dset.add_target(new_target) # Activate outlines for masking if needed if config["dataset"]["locations"]["mask_objects"]: dset.outlines = outlines dset.prepare_training_locations() return dset
def test_merge_outlines(metadata, dataframe, out_dir): metadata.loadSingle(os.path.join(out_dir, 'test.csv'), 'default', int) outlines = pd.DataFrame( { 'Metadata_Plate': __rand_array(), 'Metadata_Well': __rand_array(), 'Metadata_Site': __rand_array() }, dtype=int) metadata.mergeOutlines(outlines) merged = pd.merge(metadata.data, outlines, on=["Metadata_Plate", "Metadata_Well", "Metadata_Site"]) pd.testing.assert_frame_equal(metadata.data, merged)
def read_dataset(config): # Read metadata and split dataset in training and validation metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None) # Add outlines if specified outlines = None if "outlines" in config["prepare"].keys() and config["prepare"]["outlines"] != "": df = pd.read_csv(config["paths"]["locations"] + "/outlines.csv") metadata.mergeOutlines(df) outlines = config["paths"]["locations"] print(metadata.data.info()) # Split training data split_field = config["train"]['dset']["split_field"] trainingFilter = lambda df: df[split_field].isin(config['train']["dset"]["training_values"]) validationFilter = lambda df: df[split_field].isin(config['train']["dset"]["validation_values"]) metadata.splitMetadata(trainingFilter, validationFilter) # Create a dataset keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]) dset = ImageDataset( metadata, config['train']["sampling"]["field"], config['prepare']["images"]["channels"], config["paths"]["images"], keyGen ) # Add training targets for t in config['train']["dset"]["targets"]: new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique()) dset.add_target(new_target) # Activate outlines for masking if needed if config['train']["dset"]["mask_objects"]: dset.outlines = outlines return dset