Example #1
0
def save_project_as_pascal_voc_detection(save_path, project: Project):
    import pascal_voc_writer
    
    # Create root pascal 'datasets' folders
    for dataset in project.datasets:
        pascal_dataset_path = os.path.join(save_path, dataset.name)

        images_dir = os.path.join(pascal_dataset_path, 'JPEGImages')
        anns_dir = os.path.join(pascal_dataset_path, 'Annotations')
        lists_dir = os.path.join(pascal_dataset_path, 'ImageSets/Layout')

        fs_utils.mkdir(pascal_dataset_path)
        for subdir in ['ImageSets',  # Train list, Val list, etc.
                       'ImageSets/Layout',
                       'Annotations',
                       'JPEGImages']:
            fs_utils.mkdir(os.path.join(pascal_dataset_path, subdir))

        samples_by_tags = defaultdict(list)  # TRAIN: [img_1, img2, ..]

        for item_name in dataset:
            img_path, ann_path = dataset.get_item_paths(item_name)
            no_ext_name = fs_utils.get_file_name(item_name)
            pascal_img_path = os.path.join(images_dir, no_ext_name + OUT_IMG_EXT)
            pascal_ann_path = os.path.join(anns_dir, no_ext_name + XML_EXT)


            if item_name.endswith(OUT_IMG_EXT):
                fs_utils.copy_file(img_path, pascal_img_path)
            else:
                img = image_utils.read(img_path)
                image_utils.write(pascal_img_path, img)

            ann = Annotation.load_json_file(ann_path, project_meta=project.meta)

            # Read tags for images lists generation
            for tag in ann.img_tags:
                samples_by_tags[tag.name].append((no_ext_name ,len(ann.labels)))

            writer = pascal_voc_writer.Writer(path=pascal_img_path,
                                              width=ann.img_size[1],
                                              height=ann.img_size[0])

            for label in ann.labels:
                obj_class = label.obj_class
                rect: Rectangle = label.geometry.to_bbox()
                writer.addObject(name=obj_class.name,
                                 xmin = rect.left,
                                 ymin = rect.top,
                                 xmax = rect.right,
                                 ymax = rect.bottom)
            writer.save(pascal_ann_path)

        save_images_lists(lists_dir, samples_by_tags)
Example #2
0
    def __init__(self,
                 logger=None,
                 task_id=None,
                 server_address=None,
                 agent_token=None,
                 ignore_errors=False,
                 ignore_task_id=False):
        self._ignore_task_id = ignore_task_id
        self.logger = take_with_default(logger, default_logger)
        self._ignore_errors = ignore_errors
        self.task_id = take_with_default(task_id, int(os.environ["TASK_ID"]))
        self.server_address = take_with_default(server_address,
                                                os.environ[SERVER_ADDRESS])
        self.agent_token = take_with_default(agent_token,
                                             os.environ[AGENT_TOKEN])
        self.public_api = Api.from_env(ignore_task_id=self._ignore_task_id)
        self._app_url = self.public_api.app.get_url(self.task_id)
        self._session_dir = "/sessions/{}".format(self.task_id)
        self._template_path = None
        debug_app_dir = os.environ.get("DEBUG_APP_DIR", "")
        if debug_app_dir != "":
            self._session_dir = debug_app_dir
        mkdir(self.data_dir)

        self.cache_dir = os.path.join("/apps_cache")
        debug_cache_dir = os.environ.get("DEBUG_CACHE_DIR", "")
        if debug_cache_dir != "":
            self.cache_dir = debug_cache_dir
        mkdir(self.cache_dir)
        self.cache = FileCache(name="FileCache", storage_root=self.cache_dir)

        self.api = AgentAPI(token=self.agent_token,
                            server_address=self.server_address,
                            ext_logger=self.logger)
        self.api.add_to_metadata('x-task-id', str(self.task_id))

        self.callbacks = {}
        self.periodic_items = {}

        self.processing_queue = queue.Queue()  # (maxsize=self.QUEUE_MAX_SIZE)
        self.logger.debug('App is created',
                          extra={
                              "task_id": self.task_id,
                              "server_address": self.server_address
                          })

        self._ignore_stop_for_debug = False
        self._error = None
        self.stop_event = asyncio.Event()
        self.has_ui = False
Example #3
0
def get_data_dir():
    key = "SLY_APP_DATA_DIR"
    dir = None

    try:
        dir = os.environ[key]
    except KeyError as e:
        raise KeyError(f"Environment variable {key} is not defined")

    if dir_exists(dir) is False:
        logger.warn(
            f"App data directory {dir} doesn't exist. Will be made automatically."
        )
        mkdir(dir)
    return dir
def validate_data(api: sly.Api, task_id, context, state, app_logger):
    # g.api.app.set_field(g.task_id, "state.isValidating", True)
    report.clear()
    final_tags.clear()
    final_tags2images.clear()

    if state["trainData"] == "objects":
        mkdir(artifacts_example_img_dir, True)

    report.append({
        "type": "info",
        "title": "Total tags in project",
        "count": len(g.project_meta.tag_metas),
        "description": None
    })

    report.append({
        "title": "Tags unavailable for training",
        "count": len(tags.disabled_tags),
        "type": "warning",
        "description": "See previous step for more info"
    })

    selected_tags = tags.selected_tags  # state["selectedTags"]
    report.append({
        "title": "Selected tags for training",
        "count": len(selected_tags),
        "type": "info",
        "description": None
    })

    report.append({
        "type": "info",
        "title": "Total images in project",
        "count": g.project_info.items_count,
    })

    report.append({
        "title":
        "Images without tags",
        "count":
        len(tags.images_without_tags),
        "type":
        "warning" if len(tags.images_without_tags) > 0 else "pass",
        "description":
        "Such images don't have any tags so they will ignored and will not be used for training. "
    })

    num_images_before_validation = 0
    for tag_name in selected_tags:
        for split, infos in tags.tag2images[tag_name].items():
            num_images_before_validation += len(infos)
    report.append({
        "title":
        "Images with training tags",
        "count":
        num_images_before_validation,
        "type":
        "error" if num_images_before_validation == 0 else "pass",
        "description":
        "Images that have one of the selected tags assigned (before validation)"
    })

    collisions = defaultdict(lambda: defaultdict(int))
    for tag_name in selected_tags:
        for split, infos in tags.tag2images[tag_name].items():
            for info in infos:
                collisions[split][info.id] += 1
    num_collision_images = 0
    for split, split_collisions in collisions.items():
        for image_id, counter in split_collisions.items():
            if counter > 1:
                num_collision_images += 1
    report.append({
        "title":
        "Images with tags collisions",
        "count":
        num_collision_images,
        "type":
        "warning" if num_collision_images > 0 else "pass",
        "description":
        "Images with more than one training tags assigned, they will be removed from train/val sets. Use app 'Tags Co-Occurrence Matrix' to discover such images"
    })

    # remove collision images from sets
    final_images_count = 0
    final_train_size = 0
    final_val_size = 0
    for tag_name in selected_tags:
        for split, infos in tags.tag2images[tag_name].items():
            _final_infos = []
            for info in infos:
                if collisions[split][info.id] == 1:
                    _final_infos.append(info)
                    final_images_count += 1
                    if split == "train":
                        final_train_size += 1
                    else:
                        final_val_size += 1
            if len(_final_infos) > 0:
                final_tags2images[tag_name][split].extend(_final_infos)
        if tag_name in final_tags2images and len(
                final_tags2images[tag_name]["train"]) > 0:
            final_tags.append(tag_name)

    tags_examples = defaultdict(list)
    for tag_name, infos in final_tags2images.items():
        for info in (infos['train'] + infos['val'])[:tags._max_examples_count]:
            if state["trainData"] == "objects":
                info = upload_img_example_to_files(api, info)
                tags_examples[tag_name].append(
                    g.api.image.preview_url(info.storage_path,
                                            height=tags._preview_height))
            else:
                tags_examples[tag_name].append(
                    g.api.image.preview_url(info.path_original,
                                            height=tags._preview_height))
    sly.json.dump_json_file(tags_examples,
                            os.path.join(g.info_dir, "tag2urls.json"))

    report.append({
        "title":
        "Final images count",
        "count":
        final_images_count,
        "type":
        "error" if final_images_count == 0 else "pass",
        "description":
        "Number of images (train + val) after collisions removal"
    })
    report.append({
        "title":
        "Train set size",
        "count":
        final_train_size,
        "type":
        "error" if final_train_size == 0 else "pass",
        "description":
        "Size of training set after collisions removal"
    })
    report.append({
        "title":
        "Val set size",
        "count":
        final_val_size,
        "type":
        "error" if final_val_size == 0 else "pass",
        "description":
        "Size of validation set after collisions removal"
    })

    type = "pass"
    if len(final_tags) < 2:
        type = "error"
    elif len(final_tags) != len(selected_tags):
        type = "error"
    report.append({
        "title":
        "Final training tags",
        "count":
        len(final_tags),
        "type":
        type,
        "description":
        f"If this number differs from the number of selected tags then it means that after data "
        f"validation and cleaning some of the selected tags "
        f"{list(set(selected_tags) - set(final_tags))} "
        f"have 0 examples in train set. Please restart step 3 and deselect this tags manually"
    })

    cnt_errors = 0
    cnt_warnings = 0
    for item in report:
        if item["type"] == "error":
            cnt_errors += 1
        if item["type"] == "warning":
            cnt_warnings += 1

    fields = [
        {
            "field": "data.report",
            "payload": report
        },
        {
            "field": "data.done4",
            "payload": True
        },
        {
            "field": "data.cntErrors",
            "payload": cnt_errors
        },
        {
            "field": "data.cntWarnings",
            "payload": cnt_warnings
        },
    ]
    if cnt_errors == 0:
        # save selected tags
        gt_labels = {tag_name: idx for idx, tag_name in enumerate(final_tags)}
        sly.json.dump_json_file(gt_labels,
                                os.path.join(g.project_dir, "gt_labels.json"))
        sly.json.dump_json_file(gt_labels,
                                os.path.join(g.info_dir, "gt_labels.json"))

        # save splits
        # final_tags2images[tag_name][split].extend(_final_infos)
        split_paths = defaultdict(list)
        _splits_to_dump = defaultdict(lambda: defaultdict(list))
        for tag_name, splits in final_tags2images.items():
            for split_name, infos in splits.items():
                if state["trainData"] == "images":
                    paths = [
                        input_project.get_paths_by_image_id(info.id)
                        for info in infos
                    ]
                else:
                    paths = [
                        input_project_objects.get_paths_by_image_id(info.id)
                        for info in infos
                    ]
                split_paths[split_name].extend(paths)
        sly.json.dump_json_file(split_paths,
                                os.path.join(g.project_dir, "splits.json"))

        fields.extend([
            {
                "field": "state.collapsed5",
                "payload": False
            },
            {
                "field": "state.disabled5",
                "payload": False
            },
            {
                "field": "state.activeStep",
                "payload": 5
            },
            {
                "field": "state.isValidating",
                "payload": False
            },
        ])
    g.api.app.set_fields(g.task_id, fields)
Example #5
0
def download_project_objects(api: sly.Api, task_id, context, state,
                             app_logger):
    try:
        if not dir_exists(g.project_dir):
            mkdir(g.project_dir)
            project_meta_path = os.path.join(g.project_dir, "meta.json")
            g.project_meta = convert_object_tags(g.project_meta)
            project_meta_json = g.project_meta.to_json()
            dump_json_file(project_meta_json, project_meta_path)
            datasets = api.dataset.get_list(g.project_id)
            for dataset in datasets:
                ds_dir = os.path.join(g.project_dir, dataset.name)
                img_dir = os.path.join(ds_dir, "img")
                ann_dir = os.path.join(ds_dir, "ann")

                mkdir(ds_dir)
                mkdir(img_dir)
                mkdir(ann_dir)
                images_infos = api.image.get_list(dataset.id)
                download_progress = get_progress_cb(
                    progress_index, "Download project",
                    g.project_info.items_count * 2)
                for batch in sly.batched(images_infos):
                    image_ids = [image_info.id for image_info in batch]
                    image_names = [image_info.name for image_info in batch]
                    ann_infos = api.annotation.download_batch(
                        dataset.id, image_ids, progress_cb=download_progress)

                    image_nps = api.image.download_nps(
                        dataset.id, image_ids, progress_cb=download_progress)
                    anns = [
                        sly.Annotation.from_json(ann_info.annotation,
                                                 g.project_meta)
                        for ann_info in ann_infos
                    ]
                    selected_classes = get_selected_classes_from_ui(
                        state["classesSelected"])
                    crops = crop_and_resize_objects(image_nps, anns, state,
                                                    selected_classes,
                                                    image_names)
                    crop_nps, crop_anns, crop_names = unpack_crops(
                        crops, image_names)
                    crop_anns = copy_tags(crop_anns)
                    write_images(crop_nps, crop_names, img_dir)
                    dump_anns(crop_anns, crop_names, ann_dir)

            reset_progress(progress_index)

        global project_fs
        project_fs = sly.Project(g.project_dir, sly.OpenMode.READ)
        g.images_infos = create_img_infos(project_fs)
    except Exception as e:
        reset_progress(progress_index)
        raise e

    items_count = g.project_stats["objects"]["total"]["objectsInDataset"]
    train_percent = 80
    train_count = int(items_count / 100 * train_percent)
    random_split = {
        "count": {
            "total": items_count,
            "train": train_count,
            "val": items_count - train_count
        },
        "percent": {
            "total": 100,
            "train": train_percent,
            "val": 100 - train_percent
        },
        "shareImagesBetweenSplits": False,
        "sliderDisabled": False,
    }

    fields = [
        {
            "field": "data.done1",
            "payload": True
        },
        {
            "field": "state.collapsed2",
            "payload": False
        },
        {
            "field": "state.disabled2",
            "payload": False
        },
        {
            "field": "state.activeStep",
            "payload": 2
        },
        {
            "field": "data.totalImagesCount",
            "payload": items_count
        },
        {
            "field": "state.randomSplit",
            "payload": random_split
        },
    ]
    g.api.app.set_fields(g.task_id, fields)
Example #6
0
def create_img_infos(project_fs):
    tag_id_map = {
        tag["name"]: tag["id"]
        for tag in project_fs.meta.tag_metas.to_json()
    }
    images_infos = []
    for dataset_fs in project_fs:
        img_info_dir = os.path.join(dataset_fs.directory, "img_info")
        mkdir(img_info_dir)
        for idx, item_name in enumerate(os.listdir(dataset_fs.item_dir)):
            item_ext = get_file_ext(item_name).lstrip(".")
            item_path = os.path.join(dataset_fs.item_dir, item_name)
            item = sly.image.read(item_path)
            h, w = item.shape[:2]
            item_size = os.path.getsize(item_path)
            created_at = datetime.fromtimestamp(
                os.stat(item_path).st_ctime,
                tz=timezone.utc).strftime("%d-%m-%Y %H:%M:%S")
            modified_at = datetime.fromtimestamp(
                os.stat(item_path).st_mtime,
                tz=timezone.utc).strftime("%d-%m-%Y %H:%M:%S")

            item_ann_path = os.path.join(dataset_fs.ann_dir,
                                         f"{item_name}.json")
            ann_json = load_json_file(item_ann_path)
            ann = sly.Annotation.from_json(ann_json, project_fs.meta)
            tags = ann.img_tags
            tags_json = tags.to_json()
            labels_count = len(ann.labels)

            tags_img_info = []
            for tag in tags_json:
                tag_info = {
                    "entityId": None,
                    "tagId": tag_id_map[tag["name"]],
                    "id": None,
                    "labelerLogin": tag["labelerLogin"],
                    "createdAt": tag["createdAt"],
                    "updatedAt": tag["updatedAt"],
                    "name": tag["name"]
                }
                tags_img_info.append(tag_info)

            item_img_info = {
                "id": idx,
                "name": item_name,
                "link": "",
                "hash": "",
                "mime": f"image/{item_ext}",
                "ext": item_ext,
                "size": item_size,
                "width": w,
                "height": h,
                "labels_count": labels_count,
                "dataset_id": dataset_fs.name,
                "created_at": created_at,
                "updated_at": modified_at,
                "meta": {},
                "path_original": "",
                "full_storage_url": "",
                "tags": tags_img_info
            }
            save_path = os.path.join(img_info_dir, f"{item_name}.json")
            dump_json_file(item_img_info, save_path)
            images_infos.append(item_img_info)
    return images_infos
Example #7
0
 def _create(self):
     mkdir(self.item_dir)
Example #8
0
 def _prepare_next_dir(self):
     self._idx += 1
     self._subdir = '{:08}'.format(self._idx)
     self._odir = os.path.join(self._base_out_dir, self._subdir)
     sly_fs.mkdir(self._odir)