Exemple #1
0
 def convert_to_json(self, input_data, output_dir, is_dir=True):
     self._check_format(Format.JSON)
     ensure_dir(output_dir)
     output_file = os.path.join(output_dir, 'result.json')
     records = []
     if is_dir:
         for json_file in glob(os.path.join(input_data, '*.json')):
             with io.open(json_file, encoding='utf8') as f:
                 records.append(json.load(f))
         with io.open(output_file, mode='w', encoding='utf8') as fout:
             json.dump(records, fout, indent=2, ensure_ascii=False)
     else:
         copy2(input_data, output_file)
Exemple #2
0
    def convert_to_json_min(self, input_data, output_dir, is_dir=True):
        self._check_format(Format.JSON_MIN)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        records = []
        item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
        for item in item_iterator(input_data):
            record = deepcopy(item['input'])
            for name, value in item['output'].items():
                record[name] = self._prettify(value)
            records.append(record)

        with io.open(output_file, mode='w', encoding='utf8') as fout:
            json.dump(records, fout, indent=2, ensure_ascii=False)
Exemple #3
0
    def convert_to_csv(self, input_data, output_dir, is_dir=True, **kwargs):
        self._check_format(Format.CSV)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.csv')
        records = []
        item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file
        for item in item_iterator(input_data):
            record = deepcopy(item['input'])
            for name, value in item['output'].items():
                pretty_value = self._prettify(value)
                record[name] = pretty_value if isinstance(pretty_value, str) else json.dumps(pretty_value)
            records.append(record)

        pd.DataFrame.from_records(records).to_csv(output_file, index=False, **kwargs)
    def convert_to_conll2003(self, input_data, output_dir, is_dir=True):
        self._check_format(Format.CONLL2003)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.conll')
        data_key = self._data_keys[0]
        with io.open(output_file, 'w', encoding='utf8') as fout:
            fout.write('-DOCSTART- -X- O\n')
            item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file

            for item in item_iterator(input_data):
                tokens, tags = create_tokens_and_tags(
                    text=item['input'][data_key],
                    spans=next(iter(item['output'].values()), None)
                )
                for token, tag in zip(tokens, tags):
                    fout.write('{token} -X- _ {tag}\n'.format(token=token, tag=tag))
                fout.write('\n')
    def convert_to_csv(self, input_data, output_dir, is_dir=True, **kwargs):
        self._check_format(Format.CSV)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.csv')
        records = []
        item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file

        for item in item_iterator(input_data):
            record = deepcopy(item['input'])
            if item.get('id') is not None:
                record['id'] = item['id']
            for name, value in item['output'].items():
                pretty_value = self._prettify(value)
                record[name] = pretty_value if isinstance(pretty_value, str) else json.dumps(pretty_value, ensure_ascii=False)
            record['annotator'] = _get_annotator(item)
            record['annotation_id'] = item['annotation_id']
            record['created_at'] = item['created_at']
            record['updated_at'] = item['updated_at']
            record['lead_time'] = item['lead_time']
            if 'agreement' in item:
                record['agreement'] = item['agreement']
            records.append(record)

        pd.DataFrame.from_records(records).to_csv(output_file, index=False, **kwargs)
    def convert_to_json_min(self, input_data, output_dir, is_dir=True):
        self._check_format(Format.JSON_MIN)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        records = []
        item_iterator = self.iter_from_dir if is_dir else self.iter_from_json_file

        for item in item_iterator(input_data):
            record = deepcopy(item['input'])
            if item.get('id') is not None:
                record['id'] = item['id']
            for name, value in item['output'].items():
                record[name] = self._prettify(value)
            record['annotator'] = _get_annotator(item, int_id=True)
            record['annotation_id'] = item['annotation_id']
            record['created_at'] = item['created_at']
            record['updated_at'] = item['updated_at']
            record['lead_time'] = item['lead_time']
            if 'agreement' in item:
                record['agreement'] = item['agreement']
            records.append(record)

        with io.open(output_file, mode='w', encoding='utf8') as fout:
            json.dump(records, fout, indent=2, ensure_ascii=False)
    def convert_to_voc(self, input_data, output_dir, output_image_dir=None, is_dir=True):

        ensure_dir(output_dir)
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
            output_image_dir_rel = output_image_dir
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
            output_image_dir_rel = 'images'

        def create_child_node(doc, tag, attr, parent_node):
            child_node = doc.createElement(tag)
            text_node = doc.createTextNode(attr)
            child_node.appendChild(text_node)
            parent_node.appendChild(child_node)

        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No completions found for item #' + str(item_idx))
                continue
            image_path = item['input'][data_key]
            annotations_dir = os.path.join(output_dir, 'Annotations')
            if not os.path.exists(annotations_dir):
                os.makedirs(annotations_dir)
            if not os.path.exists(image_path):
                try:
                    image_path, is_downloaded = download(image_path, output_image_dir, input_dir=input_data)
                    if not is_downloaded:
                        output_image_dir_rel = os.path.dirname(image_path)
                except:
                    logger.error('Unable to download {image_path}. The item {item} will be skipped'.format(
                        image_path=image_path, item=item), exc_info=True)
                    # On error, use default number of channels
                    channels = 3
                else:
                    # retrieve number of channels from downloaded image
                    _, _, channels = get_image_size_and_channels(image_path)

            bboxes = next(iter(item['output'].values()))
            if len(bboxes) == 0:
                logger.error('Empty bboxes.')
                continue

            width, height = bboxes[0]['original_width'], bboxes[0]['original_height']

            image_name = os.path.splitext(os.path.basename(image_path))[0]
            xml_filepath = os.path.join(annotations_dir, image_name + '.xml')

            my_dom = xml.dom.getDOMImplementation()
            doc = my_dom.createDocument(None, 'annotation', None)
            root_node = doc.documentElement
            create_child_node(doc, 'folder', output_image_dir_rel, root_node)
            create_child_node(doc, 'filename', image_name, root_node)

            source_node = doc.createElement('source')
            create_child_node(doc, 'database', 'MyDatabase', source_node)
            create_child_node(doc, 'annotation', 'COCO2017', source_node)
            create_child_node(doc, 'image', 'flickr', source_node)
            create_child_node(doc, 'flickrid', 'NULL', source_node)
            root_node.appendChild(source_node)
            owner_node = doc.createElement('owner')
            create_child_node(doc, 'flickrid', 'NULL', owner_node)
            create_child_node(doc, 'name', 'Label Studio', owner_node)
            root_node.appendChild(owner_node)
            size_node = doc.createElement('size')
            create_child_node(doc, 'width', str(width), size_node)
            create_child_node(doc, 'height', str(height), size_node)
            create_child_node(doc, 'depth', str(channels), size_node)
            root_node.appendChild(size_node)
            create_child_node(doc, 'segmented', '0', root_node)


            for bbox in bboxes:
                name = bbox['rectanglelabels'][0]
                x = int(bbox['x'] / 100 * width)
                y = int(bbox['y'] / 100 * height)
                w = int(bbox['width'] / 100 * width)
                h = int(bbox['height'] / 100 * height)

                object_node = doc.createElement('object')
                create_child_node(doc, 'name', name, object_node)
                create_child_node(doc, 'pose', 'Unspecified', object_node)
                create_child_node(doc, 'truncated', '0', object_node)
                create_child_node(doc, 'difficult', '0', object_node)
                bndbox_node = doc.createElement('bndbox')
                create_child_node(doc, 'xmin', str(x), bndbox_node)
                create_child_node(doc, 'ymin', str(y), bndbox_node)
                create_child_node(doc, 'xmax', str(x + w), bndbox_node)
                create_child_node(doc, 'ymax', str(y + h), bndbox_node)
                object_node.appendChild(bndbox_node)
                root_node.appendChild(object_node)

            with io.open(xml_filepath, mode='w', encoding='utf8') as fout:
                doc.writexml(fout, addindent='' * 4, newl='\n', encoding='utf-8')
    def convert_to_coco(self, input_data, output_dir, output_image_dir=None, is_dir=True):
        self._check_format(Format.COCO)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
            output_image_dir_rel = output_image_dir
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
            output_image_dir_rel = 'images'
        images, categories, annotations = [], [], []
        category_name_to_id = {}
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No completions found for item #' + str(item_idx))
                continue
            image_path = item['input'][data_key]
            if not os.path.exists(image_path):
                try:
                    image_path, is_downloaded = download(image_path, output_image_dir, input_dir=input_data)
                    if is_downloaded:
                        image_path = os.path.join(output_image_dir_rel, os.path.basename(image_path))
                except:
                    logger.error('Unable to download {image_path}. The item {item} will be skipped'.format(
                        image_path=image_path, item=item
                    ), exc_info=True)
            labels = next(iter(item['output'].values()))
            if len(labels) == 0:
                logger.error('Empty bboxes.')
                continue
            width, height = labels[0]['original_width'], labels[0]['original_height']
            image_id = len(images)
            images.append({
                'width': width,
                'height': height,
                'id': image_id,
                'file_name': image_path
            })

            for label in labels:
                if 'rectanglelabels' in label:
                    category_name = label['rectanglelabels'][0]
                elif 'polygonlabels' in label:
                    category_name = label['polygonlabels'][0]
                else:
                    raise ValueError("Unknown label type")

                if category_name not in category_name_to_id:
                    category_id = len(categories)
                    category_name_to_id[category_name] = category_id
                    categories.append({
                        'id': category_id,
                        'name': category_name
                    })
                category_id = category_name_to_id[category_name]

                annotation_id = len(annotations)

                if "rectanglelabels" in label:
                    x = int(label['x'] / 100 * width)
                    y = int(label['y'] / 100 * height)
                    w = int(label['width'] / 100 * width)
                    h = int(label['height'] / 100 * height)

                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'segmentation': [],
                        'bbox': [x, y, w, h],
                        'ignore': 0,
                        'iscrowd': 0,
                        'area': w * h
                    })
                elif "polygonlabels" in label:
                    points_abs = [(x / 100 * width, y / 100 * height) for x, y in label["points"]]
                    x, y = zip(*points_abs)

                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'segmentation': [[coord for point in points_abs for coord in point]],
                        'bbox': get_polygon_bounding_box(x, y),
                        'ignore': 0,
                        'iscrowd': 0,
                        'area': get_polygon_area(x, y)
                    })
                else:
                    raise ValueError("Unknown label type")

        with io.open(output_file, mode='w', encoding='utf8') as fout:
            json.dump({
                'images': images,
                'categories': categories,
                'annotations': annotations,
                'info': {
                    'year': datetime.now().year,
                    'version': '1.0',
                    'contributor': 'Label Studio'
                }
            }, fout, indent=2)
    def convert_to_yolo(self,
                        input_data,
                        output_dir,
                        output_image_dir=None,
                        output_label_dir=None,
                        is_dir=True):
        self._check_format(Format.YOLO)
        ensure_dir(output_dir)
        notes_file = os.path.join(output_dir, 'notes.json')
        class_file = os.path.join(output_dir, 'classes.txt')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
        if output_label_dir is not None:
            ensure_dir(output_label_dir)
        else:
            output_label_dir = os.path.join(output_dir, 'labels')
            os.makedirs(output_label_dir, exist_ok=True)
        categories = []
        category_name_to_id = {}
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(
            input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No completions found for item #' +
                               str(item_idx))
                continue
            image_path = item['input'][data_key]
            if not os.path.exists(image_path):
                try:
                    image_path = download(image_path,
                                          output_image_dir,
                                          project_dir=self.project_dir,
                                          return_relative_path=True,
                                          upload_dir=self.upload_dir)
                except:
                    logger.error(
                        'Unable to download {image_path}. The item {item} will be skipped'
                        .format(image_path=image_path, item=item),
                        exc_info=True)

            # concatentate results over all tag names
            labels = []
            for key in item['output']:
                labels += item['output'][key]

            if len(labels) == 0:
                logger.warning(f'Empty bboxes for {item["output"]}')
                continue

            label_path = os.path.join(
                output_label_dir,
                os.path.splitext(os.path.basename(image_path))[0] + '.txt')
            annotations = []
            for label in labels:
                if 'rectanglelabels' in label:
                    category_name = label['rectanglelabels'][0]
                elif 'labels' in label:
                    category_name = label['labels'][0]
                else:
                    logger.warning(f"Unknown label type {label}")
                    continue
                if category_name not in category_name_to_id:
                    category_id = len(categories)
                    category_name_to_id[category_name] = category_id
                    categories.append({
                        'id': category_id,
                        'name': category_name
                    })
                category_id = category_name_to_id[category_name]

                if "rectanglelabels" in label or 'labels' in label:
                    x = (label['x'] + label['width'] / 2) / 100
                    y = (label['y'] + label['height'] / 2) / 100
                    w = label['width'] / 100
                    h = label['height'] / 100
                    annotations.append([category_id, x, y, w, h])
                else:
                    raise ValueError(f"Unknown label type {label}")
            with open(label_path, 'w') as f:
                for annotation in annotations:
                    for idx, l in enumerate(annotation):
                        if idx == len(annotation) - 1:
                            f.write(f"{l}\n")
                        else:
                            f.write(f"{l} ")
        with open(class_file, 'w', encoding='utf8') as f:
            for c in categories:
                f.write(c['name'] + '\n')
        with io.open(notes_file, mode='w', encoding='utf8') as fout:
            json.dump(
                {
                    'categories': categories,
                    'info': {
                        'year': datetime.now().year,
                        'version': '1.0',
                        'contributor': 'Label Studio'
                    }
                },
                fout,
                indent=2)
    def convert_to_coco(self,
                        input_data,
                        output_dir,
                        output_image_dir=None,
                        is_dir=True):
        self._check_format(Format.COCO)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
        images, categories, annotations = [], [], []
        category_name_to_id = {}
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(
            input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No annotations found for item #' +
                               str(item_idx))
                continue
            image_path = item['input'][data_key]
            if not os.path.exists(image_path):
                try:
                    image_path = download(image_path,
                                          output_image_dir,
                                          project_dir=self.project_dir,
                                          return_relative_path=True,
                                          upload_dir=self.upload_dir)
                except:
                    logger.error(
                        'Unable to download {image_path}. The item {item} will be skipped'
                        .format(image_path=image_path, item=item),
                        exc_info=True)

            # concatentate results over all tag names
            labels = []
            for key in item['output']:
                labels += item['output'][key]

            if len(labels) == 0:
                logger.warning(f'Empty bboxes for {item["output"]}')
                continue

            first = True

            for label in labels:
                if 'rectanglelabels' in label:
                    category_name = label['rectanglelabels'][0]
                elif 'polygonlabels' in label:
                    category_name = label['polygonlabels'][0]
                elif 'labels' in label:
                    category_name = label['labels'][0]
                else:
                    logger.warning("Unknown label type: " + str(label))
                    continue

                # get image sizes
                if first:
                    width, height = label['original_width'], label[
                        'original_height']
                    image_id = len(images)
                    images.append({
                        'width': width,
                        'height': height,
                        'id': image_id,
                        'file_name': image_path
                    })
                    first = False

                if category_name not in category_name_to_id:
                    category_id = len(categories)
                    category_name_to_id[category_name] = category_id
                    categories.append({
                        'id': category_id,
                        'name': category_name
                    })
                category_id = category_name_to_id[category_name]

                annotation_id = len(annotations)

                if 'rectanglelabels' in label or 'labels' in label:
                    x = int(label['x'] / 100 * width)
                    y = int(label['y'] / 100 * height)
                    w = int(label['width'] / 100 * width)
                    h = int(label['height'] / 100 * height)

                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'segmentation': [],
                        'bbox': [x, y, w, h],
                        'ignore': 0,
                        'iscrowd': 0,
                        'area': w * h,
                    })
                elif "polygonlabels" in label:
                    points_abs = [(x / 100 * width, y / 100 * height)
                                  for x, y in label["points"]]
                    x, y = zip(*points_abs)

                    annotations.append({
                        'id':
                        annotation_id,
                        'image_id':
                        image_id,
                        'category_id':
                        category_id,
                        'segmentation':
                        [[coord for point in points_abs for coord in point]],
                        'bbox':
                        get_polygon_bounding_box(x, y),
                        'ignore':
                        0,
                        'iscrowd':
                        0,
                        'area':
                        get_polygon_area(x, y)
                    })
                else:
                    raise ValueError("Unknown label type")

                if os.getenv('LABEL_STUDIO_FORCE_ANNOTATOR_EXPORT'):
                    annotations[-1].update(
                        {'annotator': item['completed_by'].get('email')})

        with io.open(output_file, mode='w', encoding='utf8') as fout:
            json.dump(
                {
                    'images': images,
                    'categories': categories,
                    'annotations': annotations,
                    'info': {
                        'year': datetime.now().year,
                        'version': '1.0',
                        'contributor': 'Label Studio'
                    }
                },
                fout,
                indent=2)
    def convert_to_yolo(self, input_data, output_dir, output_image_dir=None, output_label_dir=None, is_dir=True):
        self._check_format(Format.YOLO)
        ensure_dir(output_dir)
        notes_file = os.path.join(output_dir, 'notes.json')
        class_file = os.path.join(output_dir, 'classes.txt')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
        if output_label_dir is not None:
            ensure_dir(output_label_dir)
        else:
            output_label_dir = os.path.join(output_dir, 'labels')
            os.makedirs(output_label_dir, exist_ok=True)
        categories, category_name_to_id = self._get_labels()
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No completions found for item #' + str(item_idx))
                continue
            image_path = item['input'][data_key]
            if not os.path.exists(image_path):
                try:
                    image_path = download(image_path, output_image_dir, project_dir=self.project_dir,
                                          return_relative_path=True, upload_dir=self.upload_dir,
                                          download_resources=self.download_resources)
                except:
                    logger.error('Unable to download {image_path}. The item {item} will be skipped'.format(
                        image_path=image_path, item=item
                    ), exc_info=True)

            # concatenate results over all tag names
            labels = []
            for key in item['output']:
                labels += item['output'][key]

            if len(labels) == 0:
                logger.warning(f'Empty bboxes for {item["output"]}')
                continue

            label_path = os.path.join(output_label_dir, os.path.splitext(os.path.basename(image_path))[0]+'.txt')
            annotations = []
            for label in labels:

                category_name = None
                for key in ['rectanglelabels', 'polygonlabels', 'labels']:
                    if key in label and len(label[key]) > 0:
                        category_name = label[key][0]
                        break

                if category_name is None:
                    logger.warning("Unknown label type or labels are empty: " + str(label))
                    continue


                if category_name not in category_name_to_id:
                    category_id = len(categories)
                    category_name_to_id[category_name] = category_id
                    categories.append({
                        'id': category_id,
                        'name': category_name
                    })
                category_id = category_name_to_id[category_name]

                if "rectanglelabels" in label or 'labels' in label:
                    label_x, label_y, label_w, label_h, label_r = (
                        label["x"],
                        label["y"],
                        label["width"],
                        label["height"],
                        label["rotation"],
                    )
                    if abs(label_r) > 0:
                        r = math.pi * label_r / 180
                        sin_r = math.sin(r)
                        cos_r = math.cos(r)
                        h_sin_r, h_cos_r = label_h * sin_r, label_h * cos_r
                        x_top_right = label_x + label_w * cos_r
                        y_top_right = label_y + label_w * sin_r
                        
                        x_ls = [
                            label_x,
                            x_top_right,
                            x_top_right - h_sin_r,
                            label_x - h_sin_r,
                        ]
                        y_ls = [
                            label_y,
                            y_top_right,
                            y_top_right + h_cos_r,
                            label_y + h_cos_r,
                        ]
                        label_x = max(0, min(x_ls))
                        label_y = max(0, min(y_ls))
                        label_w = min(100, max(x_ls)) - label_x
                        label_h = min(100, max(y_ls)) - label_y
                        
                    x = (label_x + label_w / 2) / 100
                    y = (label_y + label_h / 2) / 100
                    w = label_w / 100
                    h = label_h / 100
                    annotations.append([category_id, x, y, w, h])
                else:
                    raise ValueError(f"Unknown label type {label}")
            with open(label_path, 'w') as f:
                for annotation in annotations:
                    for idx, l in enumerate(annotation):
                        if idx == len(annotation) -1:
                            f.write(f"{l}\n")
                        else:
                            f.write(f"{l} ")
        with open(class_file, 'w', encoding='utf8') as f:
            for c in categories:
                f.write(c['name']+'\n')
        with io.open(notes_file, mode='w', encoding='utf8') as fout:
            json.dump({
                'categories': categories,
                'info': {
                    'year': datetime.now().year,
                    'version': '1.0',
                    'contributor': 'Label Studio'
                }
            }, fout, indent=2)
Exemple #12
0
    def convert_to_coco(self,
                        input_data,
                        output_dir,
                        output_image_dir=None,
                        is_dir=True,
                        save_no_object_image=False):
        self._check_format(Format.COCO)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
            output_image_dir_rel = output_image_dir
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
            output_image_dir_rel = 'images'
        images, categories, annotations = [], [], []
        category_name_to_id = {
        }  # TODO: add custom category_name_to_id, ex {name1:0, name2:1}
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(
            input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            # Decode url -> Local path
            image_path = item['input'][data_key]
            image_name, root = urllib.parse.unquote(image_path).split('?d=')
            image_path = os.path.join(root, os.path.split(image_name)[-1])
            if not os.path.exists(image_path):
                try:
                    image_path = download(image_path, output_image_dir)
                except:
                    logger.error(
                        'Unable to download {image_path}. The item {item} will be skipped'
                        .format(image_path=image_path, item=item),
                        exc_info=True)
                    continue

            # If no objects "continue"
            if not item['output']:
                logger.warning('No completions found for item #' +
                               str(item_idx))
                width, height = get_image_size(image_path)
                image_id = len(images) + 1
                if save_no_object_image:
                    images.append({
                        'width': width,
                        'height': height,
                        'id': image_id,
                        'file_name': image_path
                    })
                continue

            width, height = get_image_size(image_path)
            image_id = len(images) + 1
            images.append({
                'width': width,
                'height': height,
                'id': image_id,
                'file_name': image_path
            })
            objects = next(iter(item['output'].values()))
            for obj in objects:
                if obj['type'] == 'RectangleLabels':
                    category_name = obj['rectanglelabels'][0]
                    if category_name not in category_name_to_id:
                        category_id = len(categories)
                        category_name_to_id[category_name] = category_id
                        categories.append({
                            'id': category_id,
                            'name': category_name
                        })
                    category_id = category_name_to_id[category_name]
                    annotation_id = len(annotations) + 1

                    x = int(obj['x'] / 100 * width)
                    y = int(obj['y'] / 100 * height)
                    w = int(obj['width'] / 100 * width)
                    h = int(obj['height'] / 100 * height)
                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'segmentation': [],
                        'bbox': [x, y, w, h],
                        'ignore': 0,
                        'iscrowd': 0,
                        'area': w * h
                    })
                else:
                    category_name = obj['polygonlabels'][0]
                    if category_name not in category_name_to_id:
                        category_id = len(categories)
                        category_name_to_id[category_name] = category_id
                        categories.append({
                            'id': category_id,
                            'name': category_name
                        })
                    category_id = category_name_to_id[category_name]
                    annotation_id = len(annotations)

                    segmentation = []
                    for x, y in obj['points']:
                        segmentation.append(x / 100 * width)
                        segmentation.append(y / 100 * width)

                    bbox_list, area = self.findWidthHeight(segmentation)

                    annotations.append({
                        'id': annotation_id,
                        'image_id': image_id,
                        'category_id': category_id,
                        'segmentation': segmentation,
                        'bbox': bbox_list,
                        'ignore': 0,
                        'iscrowd': 0,
                        'area': area
                    })

        with io.open(output_file, mode='w') as fout:
            json.dump(
                {
                    'images': images,
                    'categories': categories,
                    'annotations': annotations,
                    'info': {
                        'year': datetime.now().year,
                        'version': '1.0',
                        'contributor': 'Label Studio'
                    }
                },
                fout,
                indent=2)
Exemple #13
0
    def convert_to_coco(self, input_data, output_dir, output_image_dir=None, is_dir=True):
        self._check_format(Format.COCO)
        ensure_dir(output_dir)
        output_file = os.path.join(output_dir, 'result.json')
        if output_image_dir is not None:
            ensure_dir(output_image_dir)
            output_image_dir_rel = output_image_dir
        else:
            output_image_dir = os.path.join(output_dir, 'images')
            os.makedirs(output_image_dir, exist_ok=True)
            output_image_dir_rel = 'images'
        images, categories, annotations = [], [], []
        category_name_to_id = {}
        data_key = self._data_keys[0]
        item_iterator = self.iter_from_dir(input_data) if is_dir else self.iter_from_json_file(input_data)
        for item_idx, item in enumerate(item_iterator):
            if not item['output']:
                logger.warning('No completions found for item #' + str(item_idx))
                continue
            image_path = item['input'][data_key]
            if not os.path.exists(image_path):
                try:
                    image_path = download(image_path, output_image_dir)
                except:
                    logger.error('Unable to download {image_path}. The item {item} will be skipped'.format(
                        image_path=image_path, item=item
                    ), exc_info=True)
                    continue
            width, height = get_image_size(image_path)
            image_id = len(images)
            images.append({
                'width': width,
                'height': height,
                'id': image_id,
                'file_name': os.path.join(output_image_dir_rel, os.path.basename(image_path))
            })
            bboxes = next(iter(item['output'].values()))
            for bbox in bboxes:
                category_name = bbox['rectanglelabels'][0]
                if category_name not in category_name_to_id:
                    category_id = len(categories)
                    category_name_to_id[category_name] = category_id
                    categories.append({
                        'id': category_id,
                        'name': category_name
                    })
                category_id = category_name_to_id[category_name]
                annotation_id = len(annotations)
                x = int(bbox['x'] / 100 * width)
                y = int(bbox['y'] / 100 * height)
                w = int(bbox['width'] / 100 * width)
                h = int(bbox['height'] / 100 * height)
                annotations.append({
                    'id': annotation_id,
                    'image_id': image_id,
                    'category_id': category_id,
                    'segmentation': [],
                    'bbox': [x, y, w, h],
                    'ignore': 0,
                    'iscrowd': 0,
                    'area': w * h
                })

        with io.open(output_file, mode='w') as fout:
            json.dump({
                'images': images,
                'categories': categories,
                'annotations': annotations,
                'info': {
                    'year': datetime.now().year,
                    'version': '1.0',
                    'contributor': 'Label Studio'
                }
            }, fout, indent=2)