Exemple #1
0
    def setUp(self):
        self.setUpPyfakefs()
        self._data_dir = '/my/base/dir'

        sample_ids = ['foo', 'bar', 'baz']
        self._metadata_paths = sorted([
            fi.get_metadata_path(self._data_dir, sample_id)
            for sample_id in sample_ids
        ])

        for meta_path in self._metadata_paths:
            self.fs.create_file(meta_path)
def main():
    data_dir = paths.BasePaths.DATA_DIR
    if not os.path.exists(data_dir):
        raise IOError('Base data folder {} does not exist'.format(data_dir))

    dataset_def_dir = paths.BasePaths.DATASET_DEF_DIR
    if not os.path.exists(dataset_def_dir):
        raise IOError('Dataset definition folder {} '
                      'does not exist'.format(dataset_def_dir))

    checked_datasets = {}

    for split_name, filename in constants.DatasetFilenames.items():
        print('Dataset {}'.format(split_name))
        dataset_path = os.path.join(dataset_def_dir, filename)
        if not os.path.exists(dataset_path):
            print('  No dataset {} at path {}'.format(split_name,
                                                      dataset_path))
            continue

        with open(dataset_path, 'r') as f:
            sample_ids = set(json.load(f))

        print('  {} samples'.format(len(sample_ids)))

        for checked_name, checked_samples in checked_datasets.items():
            intersec = sample_ids & checked_samples
            if len(intersec) > 0:
                raise ValueError('{} samples are both in {} '
                                 'and {}'.format(len(intersec), checked_name,
                                                 split_name))
        checked_datasets[split_name] = sample_ids

        class_samples = {}
        for sample_id in sample_ids:
            image_path = fi.get_image_path(data_dir, sample_id)
            if not os.path.exists(image_path):
                raise ValueError('No image for sample {} '
                                 'at {}'.format(sample_id, image_path))

            meta_path = fi.get_metadata_path(data_dir, sample_id)
            if not os.path.exists(meta_path):
                raise ValueError('No metadata file for sample {} '
                                 'at {}'.format(sample_id, meta_path))

            with open(meta_path, 'r') as f:
                meta = json.load(f)

            label = fi.MetadataReader.get_label(meta)
            class_samples.setdefault(label, set()).add(sample_id)

        for label, label_samples in class_samples.items():
            print('    Label {}: {} samples'.format(label, len(label_samples)))
    def setUp(self):
        self.setUpPyfakefs()
        self._data_dir = '/my/base/dir'

        self._class_samples = {
            0: {'foo', 'bar'},
            1: {'baz'},
        }

        for label, sample_ids in self._class_samples.items():
            for sample_id in sample_ids:
                meta = {
                    constants.MetadataFields.ID: sample_id,
                    constants.MetadataFields.LABEL: label,
                }
                meta_path = fi.get_metadata_path(self._data_dir, sample_id)
                self.fs.create_file(meta_path, contents=json.dumps(meta))
Exemple #4
0
def _images_and_labels_from_dataset_definition(dataset_definition):
    data_dir = paths.BasePaths.DATA_DIR

    all_image_paths = []
    all_sample_labels = []
    for sample_id in dataset_definition:
        # Get the path to the image (TF will take care of loading).
        image_path = fi.get_image_path(data_dir, sample_id)
        all_image_paths.append(image_path)

        # Load the sample label.
        meta_path = fi.get_metadata_path(data_dir, sample_id)
        with open(meta_path, 'r') as f:
            meta = json.load(f)
        label = fi.MetadataReader.get_label(meta)
        all_sample_labels.append(label)

    return all_image_paths, all_sample_labels
def _save_sample_metadata(data_dir, sample_id, label, dry=False):
    """Stores a sample metadata file on disk.

    Args:
        data_dir (str): Path to the base data folder.
        sample_id (str): Identifies the sample.
        label (int): Class index of the sample.
        dry (bool, optional): If True, no change is applied to the fie system.
            Defaults to False.
    """
    meta_path = fi.get_metadata_path(data_dir, sample_id)
    meta_dir = os.path.dirname(meta_path)
    meta = {
        MetadataFields.ID: sample_id,
        MetadataFields.LABEL: label,
    }
    if not dry:
        paths.mkdir_if_not_exists(meta_dir)
        with open(meta_path, 'w') as f:
            json.dump(meta, f, indent=4)
Exemple #6
0
 def test_success(self):
     expected_meta_path = '/my/base/dir/8a/33/foo.json'
     meta_path = fi.get_metadata_path(data_dir=self._BASE_DIR,
                                      sample_id=self._SAMPLE_ID)
     self.assertEqual(meta_path, expected_meta_path)