def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) else: upload_dir = settings.SHARE_ROOT av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = StorageChoice.LOCAL extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() validate_dimension.set_path(os.path.split(extractor.get_zip_filename())[0]) validate_dimension.validate() if validate_dimension.dimension == DimensionType.DIM_3D: db_task.dimension = DimensionType.DIM_3D extractor.reconcile( source_files=list(validate_dimension.related_files.keys()), step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=DimensionType.DIM_3D, ) extractor.add_files(validate_dimension.converted_files) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size(0) area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest['properties']['length'] video_size = manifest['properties']['resolution'] manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else:# images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[frame_id] if db_task.dimension == DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': if validate_dimension.dimension == DimensionType.DIM_2D: models.Image.objects.bulk_create(db_images) else: related_file = [] for image_data in db_images: image_model = models.Image( data=image_data.data, path=image_data.path, frame=image_data.frame, width=image_data.width, height=image_data.height ) image_model.save() image_data = models.Image.objects.get(id=image_model.id) if validate_dimension.related_files.get(image_data.path, None): for related_image_file in validate_dimension.related_files[image_data.path]: related_file.append( RelatedFile(data=db_data, primary_image_id=image_data.id, path=related_image_file)) RelatedFile.objects.bulk_create(related_file) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def setUpClass(cls): super().setUpClass() cls.format_names = [ "Sly Point Cloud Format 1.0", "Kitti Raw Format 1.0" ] cls._image_sizes = {} cls.pointcloud_pcd_filename = "test_canvas3d.zip" cls.pointcloud_pcd_path = osp.join(os.path.dirname(__file__), 'assets', cls.pointcloud_pcd_filename) image_sizes = [] zip_file = zipfile.ZipFile(cls.pointcloud_pcd_path) for info in zip_file.namelist(): if info.rsplit(".", maxsplit=1)[-1] == "pcd": with zip_file.open(info, "r") as file: data = ValidateDimension.get_pcd_properties(file) image_sizes.append( (int(data["WIDTH"]), int(data["HEIGHT"]))) cls.task = { "name": "main task", "owner_id": 2, "assignee_id": 2, "overlap": 0, "segment_size": 100, "labels": [ { "name": "car" }, { "name": "person" }, ] } cls.task_with_attributes = { "name": "task with attributes", "owner_id": 2, "assignee_id": 2, "overlap": 0, "segment_size": 100, "labels": [ { "name": "car", "color": "#2080c0", "attributes": [{ "name": "radio_name", "mutable": False, "input_type": "radio", "default_value": "x1", "values": ["x1", "x2", "x3"] }, { "name": "check_name", "mutable": True, "input_type": "checkbox", "default_value": "false", "values": ["false"] }, { "name": "text_name", "mutable": False, "input_type": "text", "default_value": "qwerty", "values": ["qwerty"] }, { "name": "number_name", "mutable": False, "input_type": "number", "default_value": "-4.0", "values": ["-4", "4", "1"] }] }, { "name": "person", "color": "#c06060", "attributes": [] }, ] } cls.task_many_jobs = { "name": "task several jobs", "owner_id": 2, "assignee_id": 2, "overlap": 3, "segment_size": 1, "labels": [{ "name": "car", "color": "#c06060", "id": 1, "attributes": [] }] } cls.cuboid_example = { "version": 0, "tags": [], "shapes": [ { "type": "cuboid", "occluded": False, "z_order": 0, "points": [ 0.16, 0.20, -0.26, 0, -0.14, 0, 4.84, 4.48, 4.12, 0, 0, 0, 0, 0, 0, 0 ], "rotation": 0, "frame": 0, "label_id": None, "group": 0, "source": "manual", "attributes": [] }, ], "tracks": [] } cls._image_sizes[cls.pointcloud_pcd_filename] = image_sizes cls.expected_action = { cls.admin: { 'name': 'admin', 'code': status.HTTP_200_OK, 'annotation_changed': True }, cls.user: { 'name': 'user', 'code': status.HTTP_200_OK, 'annotation_changed': True }, None: { 'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'annotation_changed': False }, } cls.expected_dump_upload = { cls.admin: { 'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True, 'annotation_loaded': True }, cls.user: { 'name': 'user', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED, 'accept code': status.HTTP_202_ACCEPTED, 'file_exists': True, 'annotation_loaded': True }, None: { 'name': 'none', 'code': status.HTTP_401_UNAUTHORIZED, 'create code': status.HTTP_401_UNAUTHORIZED, 'accept code': status.HTTP_401_UNAUTHORIZED, 'file_exists': False, 'annotation_loaded': False }, }
def _create_thread(tid, data, isImport=False): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files']: if db_data.storage != models.StorageChoice.CLOUD_STORAGE: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT else: # cloud storage if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) first_sorted_media_image = sorted(media['image'])[0] cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) ) cloud_storage_manifest.set_index() media_files = sorted(media['image']) content = cloud_storage_manifest.get_subset(media_files) manifest.create(content) manifest.init_index() av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len(media['image']): media['image'].extend( [os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor']( source_path=[os.path.join(upload_dir, f) for f in media['directory']], ).absolute_source_paths ] ) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count() > 1 and db_task.project.tasks.first().dimension != validate_dimension.dimension: raise Exception(f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})') if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D extractor.reconcile( source_files=[os.path.join(upload_dir, f) for f in validate_dimension.related_files.keys()], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search(r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == models.DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, meta={ k: {'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'meta': { 'related_images': related_images[''.join((name, ext))] }, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): if isinstance(db_task, int): db_task = models.Task.objects.select_for_update().get(pk=db_task) slogger.glob.info("create task #{}".format(db_task.id)) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files'] and not isDatasetImport: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_files = [] media = _count_files(data, manifest_files) media, task_mode = _validate_data(media, manifest_files) if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path')) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT manifest_root = None if db_data.storage in { models.StorageChoice.LOCAL, models.StorageChoice.SHARE }: manifest_root = upload_dir elif db_data.storage == models.StorageChoice.CLOUD_STORAGE: manifest_root = db_data.cloud_storage.get_storage_dirname() manifest_file = _validate_manifest(manifest_files, manifest_root) if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): raise Exception( "File with meta information can be uploaded if 'Use cache' option is also selected" ) if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE: if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance( cloud_provider=db_cloud_storage.provider_type, **details) sorted_media = sort(media['image'], data['sorting_method']) first_sorted_media_image = sorted_media[0] cloud_storage_instance.download_file( first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file # NOTE we should create manifest before defining chunk_size # FIXME in the future when will be implemented archive support manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), db_data.cloud_storage.get_storage_dirname()) cloud_storage_manifest.set_index() sequence, content = cloud_storage_manifest.get_subset(sorted_media) sorted_content = (i[1] for i in sorted(zip(sequence, content))) manifest.create(sorted_content) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len( media['image']): media['image'].extend([ os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor'](source_path=[ os.path.join(upload_dir, f) for f in media['directory'] ], ).absolute_source_paths ]) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') if ( isDatasetImport or isBackupRestore ) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer( db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' source_paths = [os.path.join(upload_dir, f) for f in media_files] if manifest_file and not isBackupRestore and data[ 'sorting_method'] in { models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED }: raise Exception( "It isn't supported to upload manifest file and use random sorting" ) if isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \ data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}: raise Exception( "It isn't supported to import the task that was created without cache but with random/predefined sorting" ) details = { 'source_path': source_paths, 'step': db_data.get_frame_step(), 'start': db_data.start_frame, 'stop': data['stop_frame'], } if media_type in { 'archive', 'zip', 'pdf' } and db_data.storage == models.StorageChoice.SHARE: details['extract_dir'] = db_data.get_upload_dirname() upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if media_type != 'video': details['sorting_method'] = data['sorting_method'] extractor = MEDIA_TYPES[media_type]['extractor'](**details) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count( ) > 1 and db_task.project.tasks.first( ).dimension != validate_dimension.dimension: raise Exception( f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})' ) if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D keys_of_related_files = validate_dimension.related_files.keys() absolute_keys_of_related_files = [ os.path.join(upload_dir, f) for f in keys_of_related_files ] # When a task is created, the sorting method can be random and in this case, reinitialization will be with correct sorting # but when a task is restored from a backup, a random sorting is changed to predefined and we need to manually sort files # in the correct order. source_files = absolute_keys_of_related_files if not isBackupRestore else \ [item for item in extractor.absolute_source_paths if item in absolute_keys_of_related_files] extractor.reconcile( source_files=source_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search( r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) if isBackupRestore and not isinstance(extractor, MEDIA_TYPES['video']['extractor']) and db_data.storage_method == models.StorageMethodChoice.CACHE and \ db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} and validate_dimension.dimension != models.DimensionType.DIM_3D: # we should sort media_files according to the manifest content sequence # and we should do this in general after validation step for 3D data and after filtering from related_images manifest = ImageManifestManager(db_data.get_manifest_path()) manifest.set_index() sorted_media_files = [] for idx in range(len(extractor.absolute_source_paths)): properties = manifest[idx] image_name = properties.get('name', None) image_extension = properties.get('extension', None) full_image_path = os.path.join( upload_dir, f"{image_name}{image_extension}" ) if image_name and image_extension else None if full_image_path and full_image_path in extractor: sorted_media_files.append(full_image_path) media_files = sorted_media_files.copy() del sorted_media_files data['sorting_method'] = models.SortingMethod.PREDEFINED extractor.reconcile( source_files=media_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], sorting_method=data['sorting_method'], ) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_message = 'Images are being compressed' if not progress: status_message = '{} {}'.format( status_message, progress_animation[update_progress.call_counter]) job.meta['status'] = status_message job.meta['task_progress'] = progress or 0. job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl') if manifest_file and not os.path.exists( db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file)) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator( source_path=os.path.join( upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: manifest.remove() if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status( '{} Start prepare a valid manifest file.'. format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager( db_data.get_manifest_path()) manifest.link(media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) manifest.create() _update_status('A manifest had been created') all_frames = len(manifest.reader) video_size = manifest.reader.resolution manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM manifest.remove() del manifest base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status( "{} The task will be created using the old method". format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: manifest.link( sources=extractor.absolute_source_paths, meta={ k: { 'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir, DIM_3D=( db_task.dimension == models.DimensionType.DIM_3D), ) manifest.create() else: manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby( extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for chunk_path, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] # check mapping if not chunk_path.endswith( f"{properties['name']}{properties['extension']}" ): raise Exception( 'Incorrect file mapping to manifest content') if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)