def __init__(self, url): super().__init__() local_dir = url self._local_dir = local_dir self._cache_dir = osp.join(local_dir, 'images') with open(osp.join(url, 'config.json'), 'r') as config_file: config = json.load(config_file) config = Config(config, fallback=DEFAULT_CONFIG, schema=CONFIG_SCHEMA) self._config = config with open(osp.join(url, 'images_meta.json'), 'r') as images_file: images_meta = json.load(images_file) image_list = images_meta['images'] items = [] for entry in image_list: item_id = entry['id'] item = datumaro.DatasetItem(id=item_id, image=self._make_image_loader(item_id)) items.append((item.id, item)) items = sorted(items, key=lambda e: e[0]) items = OrderedDict(items) self._items = items self._cvat_cli = None self._session = None
def __init__(self, task_data, include_images=False): super().__init__() self._categories = self._load_categories(task_data) dm_items = [] if include_images: frame_provider = FrameProvider(task_data.db_task.data) for frame_data in task_data.group_by_frame(include_empty=True): loader = None if include_images: loader = lambda p, i=frame_data.idx: frame_provider.get_frame( i, quality=frame_provider.Quality.ORIGINAL, out_type=frame_provider.Type.NUMPY_ARRAY)[0] dm_image = Image(path=frame_data.name, loader=loader, size=(frame_data.height, frame_data.width)) dm_anno = self._read_cvat_anno(frame_data, task_data) dm_item = datumaro.DatasetItem( id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, attributes={'frame': frame_data.frame}) dm_items.append(dm_item) self._items = dm_items
def get(self, item_id, subset=None, path=None): if path or subset: raise KeyError() return datumaro.DatasetItem( id=item_id, image=self._frame_provider[item_id].getvalue() )
def __iter__(self): frames = self._frame_provider.get_frames( self._frame_provider.Quality.ORIGINAL, self._frame_provider.Type.NUMPY_ARRAY) for item_id, image in enumerate(frames): yield datumaro.DatasetItem( id=item_id, image=Image(image), )
def __init__(self, task_data, include_images=False): super().__init__() self._categories = self._load_categories(task_data) dm_items = [] is_video = task_data.meta['task']['mode'] == 'interpolation' ext = '' if is_video: ext = FrameProvider.VIDEO_FRAME_EXT if include_images: frame_provider = FrameProvider(task_data.db_task.data) if is_video: # optimization for videos: use numpy arrays instead of bytes # some formats or transforms can require image data def _make_image(i, **kwargs): loader = lambda _: frame_provider.get_frame( i, quality=frame_provider.Quality.ORIGINAL, out_type=frame_provider.Type.NUMPY_ARRAY)[0] return Image(loader=loader, **kwargs) else: # for images use encoded data to avoid recoding def _make_image(i, **kwargs): loader = lambda _: frame_provider.get_frame( i, quality=frame_provider.Quality.ORIGINAL, out_type=frame_provider.Type.BUFFER)[0].getvalue() return ByteImage(data=loader, **kwargs) for frame_data in task_data.group_by_frame(include_empty=True): image_args = { 'path': frame_data.name + ext, 'size': (frame_data.height, frame_data.width), } if include_images: dm_image = _make_image(frame_data.idx, **image_args) else: dm_image = Image(**image_args) dm_anno = self._read_cvat_anno(frame_data, task_data) dm_item = datumaro.DatasetItem( id=osp.splitext(frame_data.name)[0], annotations=dm_anno, image=dm_image, attributes={'frame': frame_data.frame}) dm_items.append(dm_item) self._items = dm_items
def __init__(self, url, cvat_annotations): self._categories = self._load_categories(cvat_annotations) dm_annotations = [] for cvat_frame_anno in cvat_annotations.group_by_frame(): dm_anno = self._read_cvat_anno(cvat_frame_anno, cvat_annotations) dm_image = Image(path=cvat_frame_anno.name, size=( cvat_frame_anno.height, cvat_frame_anno.width) ) dm_item = datumaro.DatasetItem(id=cvat_frame_anno.frame, annotations=dm_anno, image=dm_image) dm_annotations.append((dm_item.id, dm_item)) dm_annotations = sorted(dm_annotations, key=lambda e: int(e[0])) self._items = OrderedDict(dm_annotations)
def __init__(self, url): super().__init__() items = [] for (dirpath, _, filenames) in os.walk(url): for name in filenames: path = osp.join(dirpath, name) if self._is_image(path): item_id = Task.get_image_frame(path) item = datumaro.DatasetItem(id=item_id, image=path) items.append((item.id, item)) items = sorted(items, key=lambda e: int(e[0])) items = OrderedDict(items) self._items = items self._subsets = None
def __init__(self, url, db_task, user): self._db_task = db_task self._categories = self._load_categories() cvat_annotations = TaskAnnotation(db_task.id, user) with transaction.atomic(): cvat_annotations.init_from_db() cvat_annotations = Annotation(cvat_annotations.ir_data, db_task) dm_annotations = [] for cvat_anno in cvat_annotations.group_by_frame(): dm_anno = self._read_cvat_anno(cvat_anno) dm_item = datumaro.DatasetItem(id=cvat_anno.frame, annotations=dm_anno) dm_annotations.append((dm_item.id, dm_item)) dm_annotations = sorted(dm_annotations, key=lambda e: e[0]) self._items = OrderedDict(dm_annotations) self._subsets = None
def put_annotations(self, annotations): patch = {} categories = self._dataset.categories() label_cat = categories[datumaro.AnnotationType.label] label_map = {} attr_map = {} db_labels = self._db_task.label_set.all() for db_label in db_labels: label_map[db_label.id] = label_cat.find(db_label.name) db_attributes = db_label.attributespec_set.all() for db_attr in db_attributes: attr_map[(db_label.id, db_attr.id)] = db_attr.name map_label = lambda label_db_id: label_map[label_db_id] map_attr = lambda label_db_id, attr_db_id: \ attr_map[(label_db_id, attr_db_id)] for tag_obj in annotations['tags']: item_id = str(tag_obj['frame']) item_anno = patch.get(item_id, []) anno_group = tag_obj['group'] if isinstance(anno_group, int): anno_group = [anno_group] anno_label = map_label(tag_obj['label_id']) anno_attr = {} for attr in tag_obj['attributes']: attr_name = map_attr(tag_obj['label_id'], attr['id']) anno_attr[attr_name] = attr['value'] anno = datumaro.LabelObject(label=anno_label, attributes=anno_attr, group=anno_group) item_anno.append(anno) patch[item_id] = item_anno for shape_obj in annotations['shapes']: item_id = str(shape_obj['frame']) item_anno = patch.get(item_id, []) anno_group = shape_obj['group'] if isinstance(anno_group, int): anno_group = [anno_group] anno_label = map_label(shape_obj['label_id']) anno_attr = {} for attr in shape_obj['attributes']: attr_name = map_attr(shape_obj['label_id'], attr['id']) anno_attr[attr_name] = attr['value'] anno_points = shape_obj['points'] if shape_obj['type'] == ShapeType.POINTS: anno = datumaro.PointsObject(anno_points, label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj['type'] == ShapeType.POLYLINE: anno = datumaro.PolyLineObject(anno_points, label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj['type'] == ShapeType.POLYGON: anno = datumaro.PolygonObject(anno_points, label=anno_label, attributes=anno_attr, group=anno_group) elif shape_obj['type'] == ShapeType.RECTANGLE: x0, y0, x1, y1 = anno_points anno = datumaro.BboxObject(x0, y0, x1 - x0, y1 - y0, label=anno_label, attributes=anno_attr, group=anno_group) else: raise Exception("Unknown shape type '%s'" % (shape_obj['type'])) item_anno.append(anno) patch[item_id] = item_anno # TODO: support track annotations patch = [datumaro.DatasetItem(id=id_, annotations=anno) \ for id_, ann in patch.items()] self._dataset.update(patch)