def setUp(self): self.setUpPyfakefs() self._data_dir = '/my/base/dir' sample_ids = ['foo', 'bar', 'baz'] self._metadata_paths = sorted([ fi.get_metadata_path(self._data_dir, sample_id) for sample_id in sample_ids ]) for meta_path in self._metadata_paths: self.fs.create_file(meta_path)
def main(): data_dir = paths.BasePaths.DATA_DIR if not os.path.exists(data_dir): raise IOError('Base data folder {} does not exist'.format(data_dir)) dataset_def_dir = paths.BasePaths.DATASET_DEF_DIR if not os.path.exists(dataset_def_dir): raise IOError('Dataset definition folder {} ' 'does not exist'.format(dataset_def_dir)) checked_datasets = {} for split_name, filename in constants.DatasetFilenames.items(): print('Dataset {}'.format(split_name)) dataset_path = os.path.join(dataset_def_dir, filename) if not os.path.exists(dataset_path): print(' No dataset {} at path {}'.format(split_name, dataset_path)) continue with open(dataset_path, 'r') as f: sample_ids = set(json.load(f)) print(' {} samples'.format(len(sample_ids))) for checked_name, checked_samples in checked_datasets.items(): intersec = sample_ids & checked_samples if len(intersec) > 0: raise ValueError('{} samples are both in {} ' 'and {}'.format(len(intersec), checked_name, split_name)) checked_datasets[split_name] = sample_ids class_samples = {} for sample_id in sample_ids: image_path = fi.get_image_path(data_dir, sample_id) if not os.path.exists(image_path): raise ValueError('No image for sample {} ' 'at {}'.format(sample_id, image_path)) meta_path = fi.get_metadata_path(data_dir, sample_id) if not os.path.exists(meta_path): raise ValueError('No metadata file for sample {} ' 'at {}'.format(sample_id, meta_path)) with open(meta_path, 'r') as f: meta = json.load(f) label = fi.MetadataReader.get_label(meta) class_samples.setdefault(label, set()).add(sample_id) for label, label_samples in class_samples.items(): print(' Label {}: {} samples'.format(label, len(label_samples)))
def setUp(self): self.setUpPyfakefs() self._data_dir = '/my/base/dir' self._class_samples = { 0: {'foo', 'bar'}, 1: {'baz'}, } for label, sample_ids in self._class_samples.items(): for sample_id in sample_ids: meta = { constants.MetadataFields.ID: sample_id, constants.MetadataFields.LABEL: label, } meta_path = fi.get_metadata_path(self._data_dir, sample_id) self.fs.create_file(meta_path, contents=json.dumps(meta))
def _images_and_labels_from_dataset_definition(dataset_definition): data_dir = paths.BasePaths.DATA_DIR all_image_paths = [] all_sample_labels = [] for sample_id in dataset_definition: # Get the path to the image (TF will take care of loading). image_path = fi.get_image_path(data_dir, sample_id) all_image_paths.append(image_path) # Load the sample label. meta_path = fi.get_metadata_path(data_dir, sample_id) with open(meta_path, 'r') as f: meta = json.load(f) label = fi.MetadataReader.get_label(meta) all_sample_labels.append(label) return all_image_paths, all_sample_labels
def _save_sample_metadata(data_dir, sample_id, label, dry=False): """Stores a sample metadata file on disk. Args: data_dir (str): Path to the base data folder. sample_id (str): Identifies the sample. label (int): Class index of the sample. dry (bool, optional): If True, no change is applied to the fie system. Defaults to False. """ meta_path = fi.get_metadata_path(data_dir, sample_id) meta_dir = os.path.dirname(meta_path) meta = { MetadataFields.ID: sample_id, MetadataFields.LABEL: label, } if not dry: paths.mkdir_if_not_exists(meta_dir) with open(meta_path, 'w') as f: json.dump(meta, f, indent=4)
def test_success(self): expected_meta_path = '/my/base/dir/8a/33/foo.json' meta_path = fi.get_metadata_path(data_dir=self._BASE_DIR, sample_id=self._SAMPLE_ID) self.assertEqual(meta_path, expected_meta_path)