コード例 #1
0
    def get_classes(self):
        trainable_labels_file = os.path.join(
            self._data_dir, CLASSES_TRAINABLE)
        trainable_labels = set()
        try:
            with tf.gfile.Open(trainable_labels_file) as tl:
                reader = csv.reader(tl)
                # Skip header
                next(reader, None)
                for line in reader:
                    trainable_labels.add(line[2])
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'Missing label file "{}" from data_dir'.format(
                    CLASSES_TRAINABLE))
        self.trainable_labels = self._filter_classes(trainable_labels)

        labels_descriptions_file = os.path.join(
            self._data_dir, CLASSES_DESC)
        desc_by_label = {}
        try:
            with tf.gfile.Open(labels_descriptions_file) as ld:
                reader = csv.reader(ld)
                for line in reader:
                    if line[0] in self.trainable_labels:
                        desc_by_label[line[0]] = line[1]
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'Missing label description file "{}" from data_dir'.format(
                    CLASSES_DESC))

        return [
            desc for _, desc in
            sorted(desc_by_label.items(), key=lambda x: x[0])
        ]
コード例 #2
0
    def get_classes(self):
        trainable_labels_file = self._get_classes_path()
        trainable_labels = set()
        try:
            with tf.gfile.Open(trainable_labels_file) as tl:
                reader = csv.reader(tl)
                # Skip header
                next(reader, None)
                for line in reader:
                    trainable_labels.add(line[2])
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'The label file "{}" must be in the root data '
                'directory: {}'.format(
                    os.path.split(trainable_labels_file)[1], self._data_dir))

        self.trainable_labels = self._filter_classes(trainable_labels)

        # Build the map from classes to description for pretty printing their
        # names.
        labels_descriptions_file = os.path.join(self._data_dir, CLASSES_DESC)
        try:
            with tf.gfile.Open(labels_descriptions_file) as ld:
                reader = csv.reader(ld)
                for line in reader:
                    if line[0] in self.trainable_labels:
                        self.desc_by_label[line[0]] = line[1]
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'Missing label description file "{}" from root data '
                'directory: {}'.format(CLASSES_DESC, self._data_dir))

        return self.trainable_labels
コード例 #3
0
    def classes(self):
        if self._classes is None:
            trainable_labels_file = os.path.join(self._data_dir,
                                                 CLASSES_TRAINABLE)
            trainable_labels = set()
            try:
                with tf.gfile.Open(trainable_labels_file) as tl:
                    for label in tl:
                        trainable_labels.add(label.strip())
            except tf.errors.NotFoundError:
                raise InvalidDataDirectory(
                    'Missing label file "{}" from data_dir'.format(
                        CLASSES_TRAINABLE))

            labels_descriptions_file = os.path.join(self._data_dir,
                                                    CLASSES_DESC)
            desc_by_label = {}
            try:
                with tf.gfile.Open(labels_descriptions_file) as ld:
                    reader = csv.reader(ld)
                    for line in reader:
                        if line[0] in trainable_labels:
                            desc_by_label[line[0]] = line[1]
            except tf.errors.NotFoundError:
                raise InvalidDataDirectory(
                    'Missing label description file "{}" from data_dir'.format(
                        CLASSES_DESC))

            self._classes = sorted(trainable_labels)
            self._descriptions = [
                desc for _, desc in sorted(desc_by_label.items(),
                                           key=lambda x: x[0])
            ]

        return self._classes
コード例 #4
0
ファイル: csv_reader.py プロジェクト: dshea89/luminoth
    def __init__(self, data_dir, split, headers=True, columns=None, **kwargs):
        """Initializes the reader, allowing to override internal settings.

        Arguments:
            data_dir: Path to base directory where all the files are
                located. See class docstring for a description on the expected
                structure.
            split: Split to read. Possible values depend on the dataset itself.
            headers (boolean): Whether the CSV file has headers indicating
                field names, in which case those will be considered.
            columns (list or str): Column names for when `headers` is `False`
                (i.e. the CSV file has no headers). Will be ignored if
                `headers` is `True`.
        """
        super(CSVReader, self).__init__(**kwargs)

        self._data_dir = data_dir
        self._split = split

        self._annotations_path = os.path.join(
            self._data_dir, '{}.csv'.format(self._split)
        )
        if not tf.gfile.Exists(self._annotations_path):
            raise InvalidDataDirectory(
                'CSV annotation file not found. Should be located at '
                '`{}`'.format(self._annotations_path)
            )

        self._images_dir = os.path.join(self._data_dir, self._split)
        if not tf.gfile.Exists(self._images_dir):
            raise InvalidDataDirectory(
                'Image directory not found. Should be located at '
                '`{}`'.format(self._images_dir)
            )

        if columns is not None:
            if is_basestring(columns):
                columns = columns.split(',')
        else:
            columns = self.DEFAULT_COLUMNS
        self._columns = columns
        self._column_names = set(self._columns)

        self._has_headers = headers

        # Cache for the records.
        # TODO: Don't read it all upfront.
        self._records = None

        # Whether the structure of the CSV file has been checked already.
        self._csv_checked = False

        self.errors = 0
        self.yielded_records = 0
コード例 #5
0
    def _validate_structure(self):
        if not tf.gfile.Exists(self._data_dir):
            raise InvalidDataDirectory('"{}" does not exist.'.format(
                self._data_dir))

        if not tf.gfile.Exists(self._labels_path):
            raise InvalidDataDirectory('Labels path is missing')

        if not tf.gfile.Exists(self._images_path):
            raise InvalidDataDirectory('Images path is missing')

        if not tf.gfile.Exists(self._annots_path):
            raise InvalidDataDirectory('Annotations path is missing')
コード例 #6
0
    def _get_labels_filename(self):
        """
        Get the label file.
        """
        root_labels = os.path.join(self._data_dir,
                                   '{}.csv'.format(self._split))

        if tf.gfile.Exists(root_labels):
            return root_labels

        split_labels_generic = os.path.join(self._data_dir, self._split,
                                            'labels.csv')

        if tf.gfile.Exists(split_labels_generic):
            return split_labels_generic

        split_labels_redundant = os.path.join(self._data_dir, self._split,
                                              '{}.csv'.format(self._split))

        if tf.gfile.Exists(split_labels_redundant):
            return split_labels_redundant

        raise InvalidDataDirectory(
            'Could not find labels for "{}" in "{}"'.format(
                self._split, self._data_dir))
コード例 #7
0
ファイル: coco.py プロジェクト: zhearing/luminoth
    def __init__(self,
                 data_dir,
                 split,
                 year=DEFAULT_YEAR,
                 use_supercategory=False,
                 **kwargs):
        super(COCOReader, self).__init__(**kwargs)
        self._data_dir = data_dir
        self._split = split
        self._year = year

        try:
            if self._split == 'train':
                tf.logging.debug('Loading annotation json (may take a while).')

            annotations_json = json.load(
                tf.gfile.Open(self._get_annotations_path()))
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'Could not find COCO annotations in path')

        self._total_records = len(annotations_json['images'])

        category_to_name = {
            c['id']: (c['supercategory'] if use_supercategory else c['name'])
            for c in annotations_json['categories']
        }

        self._total_classes = sorted(set(category_to_name.values()))

        self._image_to_bboxes = {}
        for annotation in annotations_json['annotations']:
            image_id = annotation['image_id']
            x, y, width, height = annotation['bbox']
            self._image_to_bboxes.setdefault(image_id, []).append({
                'xmin':
                x,
                'ymin':
                y,
                'xmax':
                x + width,
                'ymax':
                y + height,
                'label':
                self.classes.index(
                    category_to_name[annotation['category_id']]),
            })

        self._image_to_details = {}
        for image in annotations_json['images']:
            self._image_to_details[image['id']] = {
                'file_name': image['file_name'],
                'width': image['width'],
                'height': image['height'],
            }

        del annotations_json

        self.yielded_records = 0
        self.errors = 0
コード例 #8
0
ファイル: flat_reader.py プロジェクト: czbiohub/luminoth
    def annotated_files(self):
        if self._annotated_files is None:
            split_path = self._get_split_path()
            try:
                all_files = tf.gfile.ListDirectory(split_path)
            except tf.errors.NotFoundError:
                raise InvalidDataDirectory(
                    'Directory for split "{}" does not exist'.format(
                        self._split))

            self._annotated_files = []
            for filename in all_files:
                if filename.endswith(".{}".format(self._annotation_type)):
                    self._annotated_files.append(
                        filename[:-(len(self._annotation_type) + 1)])
            if len(self._annotated_files) == 0:
                raise InvalidDataDirectory(
                    "Could not find any annotations in {}".format(split_path))

        return self._annotated_files
コード例 #9
0
ファイル: csv_reader.py プロジェクト: czbiohub/luminoth
    def _check_csv(self, row):
        """Checks whether the CSV has all the necessary columns.

        The actual check is done on the first row only, once the CSV has been
        finally opened and read.
        """
        if not self._csv_checked:
            missing_keys = self._column_names - set(row.keys())
            if missing_keys:
                raise InvalidDataDirectory(
                    "Columns missing from CSV: {}".format(missing_keys))
            self._csv_checked = True
コード例 #10
0
    def _normalize_csv_line(self, line_dict):
        line_dict = line_dict.copy()

        # Map known key names to known values.
        for old_key, new_key in self._field_mapper.items():
            if old_key in line_dict:
                line_dict[new_key] = line_dict.pop(old_key)

        # Remove invalid/unknown keys
        valid_keys = set(self._field_mapper.values())
        line_keys = line_dict.keys()
        for key in line_keys:
            if key not in valid_keys:
                line_dict.pop(key)

        if set(line_dict.keys()) != set(DEFAULT_COLUMNS):
            raise InvalidDataDirectory('Missing keys from CSV')

        return line_dict
コード例 #11
0
ファイル: taggerine.py プロジェクト: dshea89/luminoth
    def _read_annotations(self, directory):
        """
        Finds and parses Taggerine's JSON files.
        """
        try:
            all_files = tf.gfile.ListDirectory(self._split_path)
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                'Directory for split "{}" does not exist'.format(
                    self._split))

        annotation_file_candidates = []
        for filename in all_files:
            if filename.lower().endswith('.json'):
                annotation_file_candidates.append(filename)

        if len(annotation_file_candidates) == 0:
            raise InvalidDataDirectory(
                'Could not find any annotations in {}.'.format(
                    self._split_path) +
                'Check that there is a .json file with Taggerine\'s ' +
                'annotations.')

        self.annotations = []
        # Open, validate and extract label information.
        for filename in annotation_file_candidates:
            annotation_path = os.path.join(self._split_path, filename)
            with tf.gfile.Open(annotation_path) as annotation_file:
                annotations = json.load(annotation_file)

            if not isinstance(annotations, dict):
                # JSON file with invalid format.
                continue

            file_annotations = []

            invalid_label = False
            for image_filename, labels in annotations.items():
                if not isinstance(labels, collections.Iterable):
                    # Ignore labels that are not lists. Ignore file.
                    invalid_label = True
                    break

                # Validate labels
                for label in labels:
                    for valid_keyset in VALID_KEYS:
                        if all(key in label for key in valid_keyset):
                            break
                    else:
                        # There is not valid_keyset that can parse the label.
                        # Ignore all labels from this file.
                        invalid_label = True
                        break

                # Early stop for labels inside file when there is an invalid
                # label.
                if invalid_label:
                    break

                # Save annotations.
                file_annotations.append({
                    'image_id': os.path.basename(image_filename),
                    'filename': image_filename,
                    'path': os.path.join(self._split_path, image_filename),
                    'gt_boxes': labels,
                })

            if invalid_label:
                # Ignore file that have invalid labels.
                continue

            # If we have a valid file with data in it then we use it.
            self.annotations.extend(file_annotations)
コード例 #12
0
    def __init__(self,
                 data_dir,
                 split,
                 year=DEFAULT_YEAR,
                 use_supercategory=False,
                 **kwargs):
        super(COCOReader, self).__init__(**kwargs)
        self._data_dir = data_dir
        self._split = split
        self._year = year

        try:
            if self._split == "train":
                tf.logging.debug("Loading annotation json (may take a while).")

            annotations_json = json.load(
                tf.gfile.Open(self._get_annotations_path()))
        except tf.errors.NotFoundError:
            raise InvalidDataDirectory(
                "Could not find COCO annotations in path")

        self._total_records = len(annotations_json["images"])

        category_to_name = {
            c["id"]: (c["supercategory"] if use_supercategory else c["name"])
            for c in annotations_json["categories"]
        }

        self._total_classes = sorted(set(category_to_name.values()))

        self._image_to_bboxes = {}
        for annotation in annotations_json["annotations"]:
            image_id = annotation["image_id"]
            x, y, width, height = annotation["bbox"]

            # If the class is not in `classes`, it was filtered.
            try:
                annotation_class = self.classes.index(
                    category_to_name[annotation["category_id"]])
            except ValueError:
                continue

            self._image_to_bboxes.setdefault(image_id, []).append({
                "xmin":
                x,
                "ymin":
                y,
                "xmax":
                x + width,
                "ymax":
                y + height,
                "label":
                annotation_class,
            })

        self._image_to_details = {}
        for image in annotations_json["images"]:
            self._image_to_details[image["id"]] = {
                "file_name": image["file_name"],
                "width": image["width"],
                "height": image["height"],
            }

        del annotations_json

        self.yielded_records = 0
        self.errors = 0