def save_project_as_pascal_voc_detection(save_path, project: Project): import pascal_voc_writer # Create root pascal 'datasets' folders for dataset in project.datasets: pascal_dataset_path = os.path.join(save_path, dataset.name) images_dir = os.path.join(pascal_dataset_path, 'JPEGImages') anns_dir = os.path.join(pascal_dataset_path, 'Annotations') lists_dir = os.path.join(pascal_dataset_path, 'ImageSets/Layout') fs_utils.mkdir(pascal_dataset_path) for subdir in ['ImageSets', # Train list, Val list, etc. 'ImageSets/Layout', 'Annotations', 'JPEGImages']: fs_utils.mkdir(os.path.join(pascal_dataset_path, subdir)) samples_by_tags = defaultdict(list) # TRAIN: [img_1, img2, ..] for item_name in dataset: img_path, ann_path = dataset.get_item_paths(item_name) no_ext_name = fs_utils.get_file_name(item_name) pascal_img_path = os.path.join(images_dir, no_ext_name + OUT_IMG_EXT) pascal_ann_path = os.path.join(anns_dir, no_ext_name + XML_EXT) if item_name.endswith(OUT_IMG_EXT): fs_utils.copy_file(img_path, pascal_img_path) else: img = image_utils.read(img_path) image_utils.write(pascal_img_path, img) ann = Annotation.load_json_file(ann_path, project_meta=project.meta) # Read tags for images lists generation for tag in ann.img_tags: samples_by_tags[tag.name].append((no_ext_name ,len(ann.labels))) writer = pascal_voc_writer.Writer(path=pascal_img_path, width=ann.img_size[1], height=ann.img_size[0]) for label in ann.labels: obj_class = label.obj_class rect: Rectangle = label.geometry.to_bbox() writer.addObject(name=obj_class.name, xmin = rect.left, ymin = rect.top, xmax = rect.right, ymax = rect.bottom) writer.save(pascal_ann_path) save_images_lists(lists_dir, samples_by_tags)
def __init__(self, logger=None, task_id=None, server_address=None, agent_token=None, ignore_errors=False, ignore_task_id=False): self._ignore_task_id = ignore_task_id self.logger = take_with_default(logger, default_logger) self._ignore_errors = ignore_errors self.task_id = take_with_default(task_id, int(os.environ["TASK_ID"])) self.server_address = take_with_default(server_address, os.environ[SERVER_ADDRESS]) self.agent_token = take_with_default(agent_token, os.environ[AGENT_TOKEN]) self.public_api = Api.from_env(ignore_task_id=self._ignore_task_id) self._app_url = self.public_api.app.get_url(self.task_id) self._session_dir = "/sessions/{}".format(self.task_id) self._template_path = None debug_app_dir = os.environ.get("DEBUG_APP_DIR", "") if debug_app_dir != "": self._session_dir = debug_app_dir mkdir(self.data_dir) self.cache_dir = os.path.join("/apps_cache") debug_cache_dir = os.environ.get("DEBUG_CACHE_DIR", "") if debug_cache_dir != "": self.cache_dir = debug_cache_dir mkdir(self.cache_dir) self.cache = FileCache(name="FileCache", storage_root=self.cache_dir) self.api = AgentAPI(token=self.agent_token, server_address=self.server_address, ext_logger=self.logger) self.api.add_to_metadata('x-task-id', str(self.task_id)) self.callbacks = {} self.periodic_items = {} self.processing_queue = queue.Queue() # (maxsize=self.QUEUE_MAX_SIZE) self.logger.debug('App is created', extra={ "task_id": self.task_id, "server_address": self.server_address }) self._ignore_stop_for_debug = False self._error = None self.stop_event = asyncio.Event() self.has_ui = False
def get_data_dir(): key = "SLY_APP_DATA_DIR" dir = None try: dir = os.environ[key] except KeyError as e: raise KeyError(f"Environment variable {key} is not defined") if dir_exists(dir) is False: logger.warn( f"App data directory {dir} doesn't exist. Will be made automatically." ) mkdir(dir) return dir
def validate_data(api: sly.Api, task_id, context, state, app_logger): # g.api.app.set_field(g.task_id, "state.isValidating", True) report.clear() final_tags.clear() final_tags2images.clear() if state["trainData"] == "objects": mkdir(artifacts_example_img_dir, True) report.append({ "type": "info", "title": "Total tags in project", "count": len(g.project_meta.tag_metas), "description": None }) report.append({ "title": "Tags unavailable for training", "count": len(tags.disabled_tags), "type": "warning", "description": "See previous step for more info" }) selected_tags = tags.selected_tags # state["selectedTags"] report.append({ "title": "Selected tags for training", "count": len(selected_tags), "type": "info", "description": None }) report.append({ "type": "info", "title": "Total images in project", "count": g.project_info.items_count, }) report.append({ "title": "Images without tags", "count": len(tags.images_without_tags), "type": "warning" if len(tags.images_without_tags) > 0 else "pass", "description": "Such images don't have any tags so they will ignored and will not be used for training. " }) num_images_before_validation = 0 for tag_name in selected_tags: for split, infos in tags.tag2images[tag_name].items(): num_images_before_validation += len(infos) report.append({ "title": "Images with training tags", "count": num_images_before_validation, "type": "error" if num_images_before_validation == 0 else "pass", "description": "Images that have one of the selected tags assigned (before validation)" }) collisions = defaultdict(lambda: defaultdict(int)) for tag_name in selected_tags: for split, infos in tags.tag2images[tag_name].items(): for info in infos: collisions[split][info.id] += 1 num_collision_images = 0 for split, split_collisions in collisions.items(): for image_id, counter in split_collisions.items(): if counter > 1: num_collision_images += 1 report.append({ "title": "Images with tags collisions", "count": num_collision_images, "type": "warning" if num_collision_images > 0 else "pass", "description": "Images with more than one training tags assigned, they will be removed from train/val sets. Use app 'Tags Co-Occurrence Matrix' to discover such images" }) # remove collision images from sets final_images_count = 0 final_train_size = 0 final_val_size = 0 for tag_name in selected_tags: for split, infos in tags.tag2images[tag_name].items(): _final_infos = [] for info in infos: if collisions[split][info.id] == 1: _final_infos.append(info) final_images_count += 1 if split == "train": final_train_size += 1 else: final_val_size += 1 if len(_final_infos) > 0: final_tags2images[tag_name][split].extend(_final_infos) if tag_name in final_tags2images and len( final_tags2images[tag_name]["train"]) > 0: final_tags.append(tag_name) tags_examples = defaultdict(list) for tag_name, infos in final_tags2images.items(): for info in (infos['train'] + infos['val'])[:tags._max_examples_count]: if state["trainData"] == "objects": info = upload_img_example_to_files(api, info) tags_examples[tag_name].append( g.api.image.preview_url(info.storage_path, height=tags._preview_height)) else: tags_examples[tag_name].append( g.api.image.preview_url(info.path_original, height=tags._preview_height)) sly.json.dump_json_file(tags_examples, os.path.join(g.info_dir, "tag2urls.json")) report.append({ "title": "Final images count", "count": final_images_count, "type": "error" if final_images_count == 0 else "pass", "description": "Number of images (train + val) after collisions removal" }) report.append({ "title": "Train set size", "count": final_train_size, "type": "error" if final_train_size == 0 else "pass", "description": "Size of training set after collisions removal" }) report.append({ "title": "Val set size", "count": final_val_size, "type": "error" if final_val_size == 0 else "pass", "description": "Size of validation set after collisions removal" }) type = "pass" if len(final_tags) < 2: type = "error" elif len(final_tags) != len(selected_tags): type = "error" report.append({ "title": "Final training tags", "count": len(final_tags), "type": type, "description": f"If this number differs from the number of selected tags then it means that after data " f"validation and cleaning some of the selected tags " f"{list(set(selected_tags) - set(final_tags))} " f"have 0 examples in train set. Please restart step 3 and deselect this tags manually" }) cnt_errors = 0 cnt_warnings = 0 for item in report: if item["type"] == "error": cnt_errors += 1 if item["type"] == "warning": cnt_warnings += 1 fields = [ { "field": "data.report", "payload": report }, { "field": "data.done4", "payload": True }, { "field": "data.cntErrors", "payload": cnt_errors }, { "field": "data.cntWarnings", "payload": cnt_warnings }, ] if cnt_errors == 0: # save selected tags gt_labels = {tag_name: idx for idx, tag_name in enumerate(final_tags)} sly.json.dump_json_file(gt_labels, os.path.join(g.project_dir, "gt_labels.json")) sly.json.dump_json_file(gt_labels, os.path.join(g.info_dir, "gt_labels.json")) # save splits # final_tags2images[tag_name][split].extend(_final_infos) split_paths = defaultdict(list) _splits_to_dump = defaultdict(lambda: defaultdict(list)) for tag_name, splits in final_tags2images.items(): for split_name, infos in splits.items(): if state["trainData"] == "images": paths = [ input_project.get_paths_by_image_id(info.id) for info in infos ] else: paths = [ input_project_objects.get_paths_by_image_id(info.id) for info in infos ] split_paths[split_name].extend(paths) sly.json.dump_json_file(split_paths, os.path.join(g.project_dir, "splits.json")) fields.extend([ { "field": "state.collapsed5", "payload": False }, { "field": "state.disabled5", "payload": False }, { "field": "state.activeStep", "payload": 5 }, { "field": "state.isValidating", "payload": False }, ]) g.api.app.set_fields(g.task_id, fields)
def download_project_objects(api: sly.Api, task_id, context, state, app_logger): try: if not dir_exists(g.project_dir): mkdir(g.project_dir) project_meta_path = os.path.join(g.project_dir, "meta.json") g.project_meta = convert_object_tags(g.project_meta) project_meta_json = g.project_meta.to_json() dump_json_file(project_meta_json, project_meta_path) datasets = api.dataset.get_list(g.project_id) for dataset in datasets: ds_dir = os.path.join(g.project_dir, dataset.name) img_dir = os.path.join(ds_dir, "img") ann_dir = os.path.join(ds_dir, "ann") mkdir(ds_dir) mkdir(img_dir) mkdir(ann_dir) images_infos = api.image.get_list(dataset.id) download_progress = get_progress_cb( progress_index, "Download project", g.project_info.items_count * 2) for batch in sly.batched(images_infos): image_ids = [image_info.id for image_info in batch] image_names = [image_info.name for image_info in batch] ann_infos = api.annotation.download_batch( dataset.id, image_ids, progress_cb=download_progress) image_nps = api.image.download_nps( dataset.id, image_ids, progress_cb=download_progress) anns = [ sly.Annotation.from_json(ann_info.annotation, g.project_meta) for ann_info in ann_infos ] selected_classes = get_selected_classes_from_ui( state["classesSelected"]) crops = crop_and_resize_objects(image_nps, anns, state, selected_classes, image_names) crop_nps, crop_anns, crop_names = unpack_crops( crops, image_names) crop_anns = copy_tags(crop_anns) write_images(crop_nps, crop_names, img_dir) dump_anns(crop_anns, crop_names, ann_dir) reset_progress(progress_index) global project_fs project_fs = sly.Project(g.project_dir, sly.OpenMode.READ) g.images_infos = create_img_infos(project_fs) except Exception as e: reset_progress(progress_index) raise e items_count = g.project_stats["objects"]["total"]["objectsInDataset"] train_percent = 80 train_count = int(items_count / 100 * train_percent) random_split = { "count": { "total": items_count, "train": train_count, "val": items_count - train_count }, "percent": { "total": 100, "train": train_percent, "val": 100 - train_percent }, "shareImagesBetweenSplits": False, "sliderDisabled": False, } fields = [ { "field": "data.done1", "payload": True }, { "field": "state.collapsed2", "payload": False }, { "field": "state.disabled2", "payload": False }, { "field": "state.activeStep", "payload": 2 }, { "field": "data.totalImagesCount", "payload": items_count }, { "field": "state.randomSplit", "payload": random_split }, ] g.api.app.set_fields(g.task_id, fields)
def create_img_infos(project_fs): tag_id_map = { tag["name"]: tag["id"] for tag in project_fs.meta.tag_metas.to_json() } images_infos = [] for dataset_fs in project_fs: img_info_dir = os.path.join(dataset_fs.directory, "img_info") mkdir(img_info_dir) for idx, item_name in enumerate(os.listdir(dataset_fs.item_dir)): item_ext = get_file_ext(item_name).lstrip(".") item_path = os.path.join(dataset_fs.item_dir, item_name) item = sly.image.read(item_path) h, w = item.shape[:2] item_size = os.path.getsize(item_path) created_at = datetime.fromtimestamp( os.stat(item_path).st_ctime, tz=timezone.utc).strftime("%d-%m-%Y %H:%M:%S") modified_at = datetime.fromtimestamp( os.stat(item_path).st_mtime, tz=timezone.utc).strftime("%d-%m-%Y %H:%M:%S") item_ann_path = os.path.join(dataset_fs.ann_dir, f"{item_name}.json") ann_json = load_json_file(item_ann_path) ann = sly.Annotation.from_json(ann_json, project_fs.meta) tags = ann.img_tags tags_json = tags.to_json() labels_count = len(ann.labels) tags_img_info = [] for tag in tags_json: tag_info = { "entityId": None, "tagId": tag_id_map[tag["name"]], "id": None, "labelerLogin": tag["labelerLogin"], "createdAt": tag["createdAt"], "updatedAt": tag["updatedAt"], "name": tag["name"] } tags_img_info.append(tag_info) item_img_info = { "id": idx, "name": item_name, "link": "", "hash": "", "mime": f"image/{item_ext}", "ext": item_ext, "size": item_size, "width": w, "height": h, "labels_count": labels_count, "dataset_id": dataset_fs.name, "created_at": created_at, "updated_at": modified_at, "meta": {}, "path_original": "", "full_storage_url": "", "tags": tags_img_info } save_path = os.path.join(img_info_dir, f"{item_name}.json") dump_json_file(item_img_info, save_path) images_infos.append(item_img_info) return images_infos
def _create(self): mkdir(self.item_dir)
def _prepare_next_dir(self): self._idx += 1 self._subdir = '{:08}'.format(self._idx) self._odir = os.path.join(self._base_out_dir, self._subdir) sly_fs.mkdir(self._odir)