def export_annotations(task_id, dataset_id, categories): """ Exports annotations from current dataset to single json file accessible from: Datasets->Chosen Dataset -> Exports """ task = TaskModel.objects.get(id=task_id) dataset = DatasetModel.objects.get(id=dataset_id) task.update(status="PROGRESS") socket = create_socket() task.info("Beginning Export (COCO Format)") task.info("===== Getting COCO labels =====") coco, category_names = collect_coco_annotations(task, categories, dataset, socket) directory = f"{dataset.directory}.exports/" file_path = f"{directory}coco-{datetime.now().strftime('%m_%d_%Y__%H_%M_%S_%f')}.json" if not os.path.exists(directory): os.makedirs(directory) task.info(f"Writing export to file {file_path}") with open(file_path, 'w') as fp: json.dump(coco, fp) task.info("Creating export object") export = ExportModel(dataset_id=dataset.id, path=file_path, tags=["COCO", *category_names]) export.save() task.set_progress(100, socket=socket)
def export_annotations_to_tf_record(task_id, dataset_id, categories, validation_set_size, test_set_size, train_shards_number, val_shards_number, test_shards_number): """ Loads COCO annotations from chosen dataset, converts them to tf record format and exports them to a single ZIP file accessible from: Datasets->Chosen Dataset -> Exports """ task = TaskModel.objects.get(id=task_id) dataset = DatasetModel.objects.get(id=dataset_id) task.update(status="PROGRESS") socket = create_socket() task.info("===== Beginning Export (TF Record Format) =====") # Getting coco annotations task.info("===== Getting COCO labels =====") coco, category_names = collect_coco_annotations(task, categories, dataset, socket) out_directory = f"{dataset.directory}.exports/" image_dir = f"{dataset.directory}" if not os.path.exists(out_directory): os.makedirs(out_directory) task.info("===== Converting to TF Record =====") task.info(f"Number of train shards: {train_shards_number}") task.info(f"Number of validation shards: {val_shards_number}") task.info(f"Number of test shards: {test_shards_number}") tf_records_files_path = convert_coco_to_tfrecord(image_dir, json.dumps(coco), out_directory, validation_set_size, test_set_size, task, train_shards_number, val_shards_number, test_shards_number, include_masks=True) task.info(f"Created {len(tf_records_files_path)} TF Record files") zip_path = f"{out_directory}tf_record_zip-{datetime.now().strftime('%m_%d_%Y__%H_%M_%S_%f')}.zip" task.info(f"Writing TF Records to zip file") with zipfile.ZipFile(zip_path, 'w') as zipObj: for tf_record_file in tf_records_files_path: zipObj.write(tf_record_file, os.path.basename(tf_record_file)) # Clean exports for tf_record_file in tf_records_files_path: os.remove(tf_record_file) export = ExportModel(dataset_id=dataset.id, path=zip_path, tags=["TF Record", *category_names]) export.save() task.set_progress(100, socket=socket)
def export_annotations(task_id, dataset_id, categories, with_empty_images=False): task = TaskModel.objects.get(id=task_id) dataset = DatasetModel.objects.get(id=dataset_id) task.update(status="PROGRESS") socket = create_socket() task.info("Beginning Export (COCO Format)") db_categories = CategoryModel.objects(id__in=categories, deleted=False) \ .only(*CategoryModel.COCO_PROPERTIES) db_images = ImageModel.objects( deleted=False, dataset_id=dataset.id).only(*ImageModel.COCO_PROPERTIES) db_annotations = AnnotationModel.objects(deleted=False, category_id__in=categories) total_items = db_categories.count() coco = {'images': [], 'categories': [], 'annotations': []} total_items += db_images.count() progress = 0 # iterate though all categoires and upsert category_names = [] for category in fix_ids(db_categories): if len(category.get('keypoint_labels', [])) > 0: category['keypoints'] = category.pop('keypoint_labels', []) category['skeleton'] = category.pop('keypoint_edges', []) else: if 'keypoint_edges' in category: del category['keypoint_edges'] if 'keypoint_labels' in category: del category['keypoint_labels'] task.info(f"Adding category: {category.get('name')}") coco.get('categories').append(category) category_names.append(category.get('name')) progress += 1 task.set_progress((progress / total_items) * 100, socket=socket) total_annotations = db_annotations.count() total_images = db_images.count() for image in db_images: image = fix_ids(image) progress += 1 task.set_progress((progress / total_items) * 100, socket=socket) annotations = db_annotations.filter(image_id=image.get('id'))\ .only(*AnnotationModel.COCO_PROPERTIES) annotations = fix_ids(annotations) if len(annotations) == 0: if with_empty_images: coco.get('images').append(image) continue num_annotations = 0 for annotation in annotations: has_keypoints = len(annotation.get('keypoints', [])) > 0 has_segmentation = len(annotation.get('segmentation', [])) > 0 if has_keypoints or has_segmentation: if not has_keypoints: if 'keypoints' in annotation: del annotation['keypoints'] else: arr = np.array(annotation.get('keypoints', [])) arr = arr[2::3] annotation['num_keypoints'] = len(arr[arr > 0]) num_annotations += 1 coco.get('annotations').append(annotation) task.info( f"Exporting {num_annotations} annotations for image {image.get('id')}" ) coco.get('images').append(image) task.info( f"Done export {total_annotations} annotations and {total_images} images from {dataset.name}" ) timestamp = time.time() directory = f"{dataset.directory}.exports/" file_path = f"{directory}coco-{timestamp}.json" if not os.path.exists(directory): os.makedirs(directory) task.info(f"Writing export to file {file_path}") with open(file_path, 'w') as fp: json.dump(coco, fp) task.info("Creating export object") export = ExportModel(dataset_id=dataset.id, path=file_path, tags=["COCO", *category_names]) export.save() task.set_progress(100, socket=socket)