def read_dataset(config): # Read metadata and split dataset in training and validation metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None) if config["prepare"]["compression"]["implement"]: metadata.data.replace({ '.tiff': '.png', '.tif': '.png' }, inplace=True, regex=True) # Add outlines if specified outlines = None if "outlines" in config["prepare"].keys( ) and config["prepare"]["outlines"] != "": df = pd.read_csv(config["paths"]["metadata"] + "/outlines.csv") metadata.mergeOutlines(df) outlines = config["paths"]["root"] + "inputs/outlines/" print(metadata.data.info()) # Split training data split_field = config["train"]["partition"]["split_field"] trainingFilter = lambda df: df[split_field].isin(config["train"][ "partition"]["training_values"]) validationFilter = lambda df: df[split_field].isin(config["train"][ "partition"]["validation_values"]) metadata.splitMetadata(trainingFilter, validationFilter) # Create a dataset keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r[ "Metadata_Well"], r["Metadata_Site"]) dset = ImageDataset(metadata, config["dataset"]["metadata"]["label_field"], config["dataset"]["images"]["channels"], config["paths"]["images"], keyGen, config) # Add training targets for t in config["train"]["partition"]["targets"]: new_target = deepprofiler.dataset.target.MetadataColumnTarget( t, metadata.data[t].unique()) dset.add_target(new_target) # Activate outlines for masking if needed if config["dataset"]["locations"]["mask_objects"]: dset.outlines = outlines dset.prepare_training_locations() return dset
def read_dataset(config): # Read metadata and split dataset in training and validation metadata = deepprofiler.dataset.metadata.Metadata(config["paths"]["index"], dtype=None) # Add outlines if specified outlines = None if "outlines" in config["prepare"].keys() and config["prepare"]["outlines"] != "": df = pd.read_csv(config["paths"]["locations"] + "/outlines.csv") metadata.mergeOutlines(df) outlines = config["paths"]["locations"] print(metadata.data.info()) # Split training data split_field = config["train"]['dset']["split_field"] trainingFilter = lambda df: df[split_field].isin(config['train']["dset"]["training_values"]) validationFilter = lambda df: df[split_field].isin(config['train']["dset"]["validation_values"]) metadata.splitMetadata(trainingFilter, validationFilter) # Create a dataset keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"]) dset = ImageDataset( metadata, config['train']["sampling"]["field"], config['prepare']["images"]["channels"], config["paths"]["images"], keyGen ) # Add training targets for t in config['train']["dset"]["targets"]: new_target = deepprofiler.dataset.target.MetadataColumnTarget(t, metadata.data[t].unique()) dset.add_target(new_target) # Activate outlines for masking if needed if config['train']["dset"]["mask_objects"]: dset.outlines = outlines return dset
def test_split_metadata(metadata, dataframe, out_dir): metadata.loadSingle(os.path.join(out_dir, 'test.csv'), 'default', int) train_rule = lambda data: data['Metadata_Plate'] < 50 val_rule = lambda data: data['Metadata_Plate'] >= 50 metadata.splitMetadata(train_rule, val_rule) assert len(metadata.train) + len(metadata.val) == len(metadata.data)