def _read(self): if not dir_exists(self.img_dir): raise FileNotFoundError('Image directory not found: {!r}'.format( self.img_dir)) if not dir_exists(self.ann_dir): raise FileNotFoundError( 'Annotation directory not found: {!r}'.format(self.ann_dir)) ann_paths = list_files(self.ann_dir, [ANN_EXT]) img_paths = list_files(self.img_dir, image.SUPPORTED_IMG_EXTS) ann_names = set(get_file_name(path) for path in ann_paths) img_names = { get_file_name(path): get_file_ext(path) for path in img_paths } if len(img_names) == 0 or len(ann_names) == 0: raise RuntimeError('Dataset {!r} is empty'.format(self.name)) if ann_names != set(img_names.keys()): raise RuntimeError( 'File names in dataset {!r} are inconsistent'.format( self.name)) self._items_exts = img_names
def get_image_and_ann(): mkdir(image_dir_path) mkdir(ann_dir) image_path = os.path.join(image_dir_path, image_name) api.image.download_path(image_id, image_path) image_ext_to_png(image_path) mask_color, mask_label, poly_json = from_ann_to_cityscapes_mask( ann, name2id, app_logger, train_val_flag) # dump_json_file(poly_json, # os.path.join(ann_dir, get_file_name(base_image_name) + cityscapes_polygons_suffix)) # write( # os.path.join(ann_dir, # get_file_name(base_image_name) + cityscapes_color_suffix), mask_color) # write( # os.path.join(ann_dir, # get_file_name(base_image_name) + cityscapes_labels_suffix), mask_label) dump_json_file( poly_json, os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_polygons_suffix)) write( os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_color_suffix), mask_color) write( os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_labels_suffix), mask_label)
def save_project_as_pascal_voc_detection(save_path, project: Project): # Create root pascal 'datasets' folders for dataset in project.datasets: pascal_dataset_path = os.path.join(save_path, dataset.name) pascal_dataset_relative_path = os.path.relpath(pascal_dataset_path, save_path) images_dir = os.path.join(pascal_dataset_path, 'JPEGImages') anns_dir = os.path.join(pascal_dataset_path, 'Annotations') lists_dir = os.path.join(pascal_dataset_path, 'ImageSets/Layout') fs_utils.mkdir(pascal_dataset_path) for subdir in [ 'ImageSets', # Train list, Val list, etc. 'ImageSets/Layout', 'Annotations', 'JPEGImages' ]: fs_utils.mkdir(os.path.join(pascal_dataset_path, subdir)) samples_by_tags = defaultdict(list) # TRAIN: [img_1, img2, ..] for item_name in dataset: img_path, ann_path = dataset.get_item_paths(item_name) no_ext_name = fs_utils.get_file_name(item_name) pascal_img_path = os.path.join(images_dir, no_ext_name + OUT_IMG_EXT) pascal_ann_path = os.path.join(anns_dir, no_ext_name + XML_EXT) if item_name.endswith(OUT_IMG_EXT): fs_utils.copy_file(img_path, pascal_img_path) else: img = image_utils.read(img_path) image_utils.write(pascal_img_path, img) ann = Annotation.load_json_file(ann_path, project_meta=project.meta) # Read tags for images lists generation for tag in ann.img_tags: samples_by_tags[tag.name].append( (no_ext_name, len(ann.labels))) writer = pascal_voc_writer.Writer( path=pascal_dataset_relative_path, width=ann.img_size[1], height=ann.img_size[0]) for label in ann.labels: obj_class = label.obj_class rect: Rectangle = label.geometry.to_bbox() writer.addObject(name=obj_class.name, xmin=rect.left, ymin=rect.top, xmax=rect.right, ymax=rect.bottom) writer.save(pascal_ann_path) save_images_lists(lists_dir, samples_by_tags)
def gen_video_stream_name(file_name, stream_index): ''' Create name to video stream from given filename and index of stream :param file_name: str :param stream_index: int :return: str ''' return "{}_stream_{}_{}{}".format(get_file_name(file_name), stream_index, rand_str(5), get_file_ext(file_name))
def generate_names(base_name, count): name = sly_fs.get_file_name(base_name) ext = sly_fs.get_file_ext(base_name) names = [base_name] for idx in range(1, count): names.append('{}_{:02d}{}'.format(name, idx, ext)) return names
def generate_free_name(used_names, possible_name, with_ext=False): res_name = possible_name new_suffix = 1 while res_name in set(used_names): if with_ext is True: res_name = '{}_{:02d}{}'.format(sly_fs.get_file_name(possible_name), new_suffix, sly_fs.get_file_ext(possible_name)) else: res_name = '{}_{:02d}'.format(possible_name, new_suffix) new_suffix += 1 return res_name
def _get_free_name(exist_check_fn, name): res_title = name suffix = 1 name_without_ext = get_file_name(name) ext = get_file_ext(name) while exist_check_fn(res_title): res_title = '{}_{:03d}{}'.format(name_without_ext, suffix, ext) suffix += 1 return res_title
def upload_dtl_archive(self, task_id, archive_path, progress_cb=None): encoder = MultipartEncoder({'id': str(task_id).encode('utf-8'), 'name': get_file_name(archive_path), 'archive': (os.path.basename(archive_path), open(archive_path, 'rb'), 'application/x-tar') }) def callback(monitor): read_mb = monitor.bytes_read / 1024.0 / 1024.0 if progress_cb is not None: progress_cb(read_mb) monitor = MultipartEncoderMonitor(encoder, callback) self.api.post('tasks.upload.dtl_archive', monitor)
def get_free_name(self, team_id, path): directory = Path(path).parent name = get_file_name(path) ext = get_file_ext(path) res_name = name suffix = 0 def _combine(suffix: int = None): res = "{}/{}".format(directory, res_name) if suffix is not None: res += "_{:03d}".format(suffix) if ext: res += "{}".format(ext) return res res_path = _combine() while self.exists(team_id, res_path): res_path = _combine(suffix) suffix += 1 return res_path
def convert_project(dest_dir, result_dir, app_logger): datasets_paths = glob(dest_dir + "/*/") if len(datasets_paths) == 0: g.logger.warn('There are no datasets in project') meta_json = sly.json.load_json_file(os.path.join(dest_dir, 'meta.json')) meta = sly.ProjectMeta.from_json(meta_json) for ds_path in datasets_paths: ds_name = ds_path.split('/')[-2] anns_paths = glob(ds_path + "ann" + "/*") progress = sly.Progress('Processing Video', len(anns_paths), app_logger) for ann_path in anns_paths: ann_json = sly.json.load_json_file(ann_path) ann = sly.VideoAnnotation.from_json(ann_json, meta) video_name = sly.io.fs.get_file_name(ann_path) video_path = os.path.join(ds_path, "video", video_name) video_info = sly.video.get_info(video_path)['streams'][0] curr_objs_geometry_types = [obj.obj_class.geometry_type for obj in ann.objects] if os.environ['modal.state.shapes'] == "false" and Rectangle not in curr_objs_geometry_types: g.logger.warn('Video {} does not contain figures with shape Rectangle'.format(video_name)) continue if len(ann.figures) > 0: result_images = os.path.join(result_dir, ds_name, "train", get_file_name(video_name), g.images_dir_name) result_anns = os.path.join(result_dir, ds_name, "train", get_file_name(video_name), g.ann_dir_name) seq_path = os.path.join(result_dir, ds_name, "train", get_file_name(video_name), g.seq_name) mkdir(result_images) mkdir(result_anns) if len(ann.figures) == 0: result_images = os.path.join(result_dir, ds_name, "test", get_file_name(video_name), g.images_dir_name) seq_path = os.path.join(result_dir, ds_name, "test", get_file_name(video_name), g.seq_name) mkdir(result_images) with open(seq_path, 'a') as f: f.write('[Sequence]\n') f.write('name={}\n'.format(get_file_name(video_name))) f.write('imDir={}\n'.format(g.images_dir_name)) f.write('frameRate={}\n'.format(round(1 / video_info['framesToTimecodes'][1]))) f.write('seqLength={}\n'.format(video_info['framesCount'])) f.write('imWidth={}\n'.format(video_info['width'])) f.write('imHeight={}\n'.format(video_info['height'])) f.write('imExt={}\n'.format(g.image_ext)) id_to_video_obj = {} for idx, curr_video_obj in enumerate(ann.objects): id_to_video_obj[curr_video_obj] = idx + 1 for frame_index, frame in enumerate(ann.frames): for figure in frame.figures: if os.environ['modal.state.shapes'] == "false" and figure.video_object.obj_class.geometry_type != Rectangle: continue rectangle_geom = figure.geometry.to_bbox() left = rectangle_geom.left top = rectangle_geom.top width = rectangle_geom.width height = rectangle_geom.height conf_val = 1 for curr_tag in figure.video_object.tags: if g.conf_tag_name == curr_tag.name and ( curr_tag.frame_range is None or frame_index in range(curr_tag.frame_range[0], curr_tag.frame_range[1] + 1)): conf_val = 0 curr_gt_data = '{},{},{},{},{},{},{},{},{},{}\n'.format(frame_index + 1, id_to_video_obj[figure.video_object], left, top, width - 1, height - 1, conf_val, -1, -1, -1) filename = 'gt_{}.txt'.format(figure.parent_object.obj_class.name) with open(os.path.join(result_anns, filename), 'a') as f: # gt_path f.write(curr_gt_data) if frame_index == ann.frames_count: break vidcap = cv2.VideoCapture(video_path) success, image = vidcap.read() count = 1 while success: image_name = str(count).zfill(6) + g.image_ext image_path = os.path.join(result_images, image_name) cv2.imwrite(image_path, image) success, image = vidcap.read() count += 1 progress.iter_done_report()
def from_sl_to_cityscapes(api: sly.Api, task_id, context, state, app_logger): def get_image_and_ann(): mkdir(image_dir_path) mkdir(ann_dir) image_path = os.path.join(image_dir_path, image_name) api.image.download_path(image_id, image_path) image_ext_to_png(image_path) mask_color, mask_label, poly_json = from_ann_to_cityscapes_mask( ann, name2id, app_logger, train_val_flag) # dump_json_file(poly_json, # os.path.join(ann_dir, get_file_name(base_image_name) + cityscapes_polygons_suffix)) # write( # os.path.join(ann_dir, # get_file_name(base_image_name) + cityscapes_color_suffix), mask_color) # write( # os.path.join(ann_dir, # get_file_name(base_image_name) + cityscapes_labels_suffix), mask_label) dump_json_file( poly_json, os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_polygons_suffix)) write( os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_color_suffix), mask_color) write( os.path.join( ann_dir, get_file_name(base_image_name).replace('_leftImg8bit', '') + cityscapes_labels_suffix), mask_label) project_name = api.project.get_info_by_id(PROJECT_ID).name ARCHIVE_NAME = '{}_{}_Cityscapes.tar.gz'.format(PROJECT_ID, project_name) meta_json = api.project.get_meta(PROJECT_ID) meta = sly.ProjectMeta.from_json(meta_json) has_bitmap_poly_shapes = False for obj_class in meta.obj_classes: if obj_class.geometry_type not in possible_geometries: app_logger.warn( f'Cityscapes format supports only bitmap and polygon classes, {obj_class.geometry_type} will be skipped' ) else: has_bitmap_poly_shapes = True if has_bitmap_poly_shapes is False: raise Exception( 'Input project does not contain bitmap or polygon classes') my_app.stop() RESULT_ARCHIVE = os.path.join(my_app.data_dir, ARCHIVE_NAME) RESULT_DIR = os.path.join(my_app.data_dir, RESULT_DIR_NAME) result_images_train = os.path.join(RESULT_DIR, images_dir_name, default_dir_train) result_images_val = os.path.join(RESULT_DIR, images_dir_name, default_dir_val) result_images_test = os.path.join(RESULT_DIR, images_dir_name, default_dir_test) result_anns_train = os.path.join(RESULT_DIR, annotations_dir_name, default_dir_train) result_anns_val = os.path.join(RESULT_DIR, annotations_dir_name, default_dir_val) result_anns_test = os.path.join(RESULT_DIR, annotations_dir_name, default_dir_test) sly.fs.mkdir(RESULT_DIR) app_logger.info("Cityscapes Dataset folder has been created") class_to_id = [] name2id = {} for idx, obj_class in enumerate(meta.obj_classes): if obj_class.geometry_type not in possible_geometries: continue curr_class = {} curr_class['name'] = obj_class.name curr_class['id'] = idx + 1 curr_class['color'] = obj_class.color class_to_id.append(curr_class) name2id[obj_class.name] = (idx + 1, idx + 1, idx + 1) dump_json_file(class_to_id, os.path.join(RESULT_DIR, 'class_to_id.json')) app_logger.info("Writing classes with colors to class_to_id.json file") datasets = api.dataset.get_list(PROJECT_ID) for dataset in datasets: images_dir_path_train = os.path.join(result_images_train, dataset.name) images_dir_path_val = os.path.join(result_images_val, dataset.name) images_dir_path_test = os.path.join(result_images_test, dataset.name) anns_dir_path_train = os.path.join(result_anns_train, dataset.name) anns_dir_path_val = os.path.join(result_anns_val, dataset.name) anns_dir_path_test = os.path.join(result_anns_test, dataset.name) images = api.image.get_list(dataset.id) progress = sly.Progress( 'Convert images and anns from dataset {}'.format(dataset.name), len(images), app_logger) if len(images) < 3: app_logger.warn( 'Number of images in {} dataset is less then 3, val and train directories for this dataset will not be created' .format(dataset.name)) image_ids = [image_info.id for image_info in images] base_image_names = [image_info.name for image_info in images] # image_names = [ # get_file_name(image_info.name) + cityscapes_images_suffix + get_file_ext(image_info.name) for # image_info in images # ] image_names = [ get_file_name(image_info.name.replace('_leftImg8bit', '')) + \ cityscapes_images_suffix + get_file_ext(image_info.name) for image_info in images ] ann_infos = api.annotation.download_batch(dataset.id, image_ids) anns = [ sly.Annotation.from_json(ann_info.annotation, meta) for ann_info in ann_infos ] splitter = get_tags_splitter(anns) curr_splitter = {'train': 0, 'val': 0, 'test': 0} for ann, image_id, image_name, base_image_name in zip( anns, image_ids, image_names, base_image_names): train_val_flag = True try: split_name = ann.img_tags.get('split').value if split_name == 'train': image_dir_path = images_dir_path_train ann_dir = anns_dir_path_train elif split_name == 'val': image_dir_path = images_dir_path_val ann_dir = anns_dir_path_val else: image_dir_path = images_dir_path_test ann_dir = anns_dir_path_test train_val_flag = False except: ann_tags = [tag.name for tag in ann.img_tags] separator_tags = list(set(ann_tags) & set(possible_tags)) if len(separator_tags) > 1: app_logger.warn( '''There are more then one separator tag for {} image. {} tag will be used for split'''.format( image_name, separator_tags[0])) if len(separator_tags) >= 1: if separator_tags[0] == 'train': image_dir_path = images_dir_path_train ann_dir = anns_dir_path_train elif separator_tags[0] == 'val': image_dir_path = images_dir_path_val ann_dir = anns_dir_path_val else: image_dir_path = images_dir_path_test ann_dir = anns_dir_path_test train_val_flag = False if len(separator_tags) == 0: if curr_splitter['test'] == splitter['test']: curr_splitter = {'train': 0, 'val': 0, 'test': 0} if curr_splitter['train'] < splitter['train']: curr_splitter['train'] += 1 image_dir_path = images_dir_path_train ann_dir = anns_dir_path_train elif curr_splitter['val'] < splitter['val']: curr_splitter['val'] += 1 image_dir_path = images_dir_path_val ann_dir = anns_dir_path_val elif curr_splitter['test'] < splitter['test']: curr_splitter['test'] += 1 image_dir_path = images_dir_path_test ann_dir = anns_dir_path_test train_val_flag = False get_image_and_ann() progress.iter_done_report() sly.fs.archive_directory(RESULT_DIR, RESULT_ARCHIVE) app_logger.info("Result directory is archived") upload_progress = [] remote_archive_path = "/cityscapes_format/{}/{}".format( task_id, ARCHIVE_NAME) def _print_progress(monitor, upload_progress): if len(upload_progress) == 0: upload_progress.append( sly.Progress(message="Upload {!r}".format(ARCHIVE_NAME), total_cnt=monitor.len, ext_logger=app_logger, is_size=True)) upload_progress[0].set_current_value(monitor.bytes_read) file_info = api.file.upload( team_id=TEAM_ID, src=RESULT_ARCHIVE, dst=remote_archive_path, progress_cb=lambda m: _print_progress(m, upload_progress)) app_logger.info("Uploaded to Team-Files: {!r}".format( file_info.full_storage_url)) api.task.set_output_archive(task_id, file_info.id, ARCHIVE_NAME, file_url=file_info.full_storage_url) my_app.stop()
def import_cityscapes(api: sly.Api, task_id, context, state, app_logger): tag_metas = sly.TagMetaCollection() obj_classes = sly.ObjClassCollection() dataset_names = [] storage_dir = my_app.data_dir if INPUT_DIR: cur_files_path = INPUT_DIR extract_dir = os.path.join( storage_dir, str(Path(cur_files_path).parent).lstrip("/")) input_dir = os.path.join(extract_dir, Path(cur_files_path).name) archive_path = os.path.join( storage_dir, cur_files_path + ".tar") # cur_files_path.split("/")[-2] + ".tar" project_name = Path(cur_files_path).name else: cur_files_path = INPUT_FILE extract_dir = os.path.join(storage_dir, get_file_name(cur_files_path)) archive_path = os.path.join(storage_dir, get_file_name_with_ext(cur_files_path)) project_name = get_file_name(INPUT_FILE) input_dir = os.path.join(storage_dir, get_file_name(cur_files_path)) # extract_dir api.file.download(TEAM_ID, cur_files_path, archive_path) if tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as archive: archive.extractall(extract_dir) else: raise Exception("No such file".format(INPUT_FILE)) new_project = api.project.create(WORKSPACE_ID, project_name, change_name_if_conflict=True) tags_template = os.path.join(input_dir, "gtFine", "*") tags_paths = glob.glob(tags_template) tags = [os.path.basename(tag_path) for tag_path in tags_paths] if train_tag in tags and val_tag not in tags: split_train = True elif trainval_tag in tags and val_tag not in tags: split_train = True else: split_train = False search_fine = os.path.join(input_dir, "gtFine", "*", "*", "*_gt*_polygons.json") files_fine = glob.glob(search_fine) files_fine.sort() search_imgs = os.path.join(input_dir, "leftImg8bit", "*", "*", "*_leftImg8bit" + IMAGE_EXT) files_imgs = glob.glob(search_imgs) files_imgs.sort() if len(files_fine) == 0 or len(files_imgs) == 0: raise Exception('Input cityscapes format not correct') samples_count = len(files_fine) progress = sly.Progress('Importing images', samples_count) images_pathes_for_compare = [] images_pathes = {} images_names = {} anns_data = {} ds_name_to_id = {} if samples_count > 2: random_train_indexes = get_split_idxs(samples_count, samplePercent) for idx, orig_ann_path in enumerate(files_fine): parent_dir, json_filename = os.path.split( os.path.abspath(orig_ann_path)) dataset_name = os.path.basename(parent_dir) if dataset_name not in dataset_names: dataset_names.append(dataset_name) ds = api.dataset.create(new_project.id, dataset_name, change_name_if_conflict=True) ds_name_to_id[dataset_name] = ds.id images_pathes[dataset_name] = [] images_names[dataset_name] = [] anns_data[dataset_name] = [] orig_img_path = json_path_to_image_path(orig_ann_path) images_pathes_for_compare.append(orig_img_path) if not file_exists(orig_img_path): logger.warn( 'Image for annotation {} not found is dataset {}'.format( orig_ann_path.split('/')[-1], dataset_name)) continue images_pathes[dataset_name].append(orig_img_path) images_names[dataset_name].append( sly.io.fs.get_file_name_with_ext(orig_img_path)) tag_path = os.path.split(parent_dir)[0] train_val_tag = os.path.basename(tag_path) if split_train is True and samples_count > 2: if (train_val_tag == train_tag) or (train_val_tag == trainval_tag): if idx in random_train_indexes: train_val_tag = train_tag else: train_val_tag = val_tag # tag_meta = sly.TagMeta(train_val_tag, sly.TagValueType.NONE) tag_meta = sly.TagMeta('split', sly.TagValueType.ANY_STRING) if not tag_metas.has_key(tag_meta.name): tag_metas = tag_metas.add(tag_meta) # tag = sly.Tag(tag_meta) tag = sly.Tag(meta=tag_meta, value=train_val_tag) json_data = json.load(open(orig_ann_path)) ann = sly.Annotation.from_img_path(orig_img_path) for obj in json_data['objects']: class_name = obj['label'] if class_name == 'out of roi': polygon = obj['polygon'][:5] interiors = [obj['polygon'][5:]] else: polygon = obj['polygon'] if len(polygon) < 3: logger.warn( 'Polygon must contain at least 3 points in ann {}, obj_class {}' .format(orig_ann_path, class_name)) continue interiors = [] interiors = [convert_points(interior) for interior in interiors] polygon = sly.Polygon(convert_points(polygon), interiors) if city_classes_to_colors.get(class_name, None): obj_class = sly.ObjClass( name=class_name, geometry_type=sly.Polygon, color=city_classes_to_colors[class_name]) else: new_color = generate_rgb(city_colors) city_colors.append(new_color) obj_class = sly.ObjClass(name=class_name, geometry_type=sly.Polygon, color=new_color) ann = ann.add_label(sly.Label(polygon, obj_class)) if not obj_classes.has_key(class_name): obj_classes = obj_classes.add(obj_class) ann = ann.add_tag(tag) anns_data[dataset_name].append(ann) progress.iter_done_report() out_meta = sly.ProjectMeta(obj_classes=obj_classes, tag_metas=tag_metas) api.project.update_meta(new_project.id, out_meta.to_json()) for ds_name, ds_id in ds_name_to_id.items(): dst_image_infos = api.image.upload_paths(ds_id, images_names[ds_name], images_pathes[ds_name]) dst_image_ids = [img_info.id for img_info in dst_image_infos] api.annotation.upload_anns(dst_image_ids, anns_data[ds_name]) stat_dct = { 'samples': samples_count, 'src_ann_cnt': len(files_fine), 'src_img_cnt': len(files_imgs) } logger.info('Found img/ann pairs.', extra=stat_dct) images_without_anns = set(files_imgs) - set(images_pathes_for_compare) if len(images_without_anns) > 0: logger.warn('Found source images without corresponding annotations:') for im_path in images_without_anns: logger.warn('Annotation not found {}'.format(im_path)) logger.info('Found classes.', extra={ 'cnt': len(obj_classes), 'classes': sorted([obj_class.name for obj_class in obj_classes]) }) logger.info('Created tags.', extra={ 'cnt': len(out_meta.tag_metas), 'tags': sorted([tag_meta.name for tag_meta in out_meta.tag_metas]) }) my_app.stop()
def gen_video_stream_name(file_name, stream_index): return "{}_stream_{}_{}{}".format(get_file_name(file_name), stream_index, rand_str(5), get_file_ext(file_name))