def _import_annotations(request, rq_id, rq_func, pk, format_name): format_desc = {f.DISPLAY_NAME: f for f in dm.views.get_import_formats()}.get(format_name) if format_desc is None: raise serializers.ValidationError( "Unknown input format '{}'".format(format_name)) elif not format_desc.ENABLED: return Response(status=status.HTTP_405_METHOD_NOT_ALLOWED) queue = django_rq.get_queue("default") rq_job = queue.fetch_job(rq_id) if not rq_job: serializer = AnnotationFileSerializer(data=request.data) if serializer.is_valid(raise_exception=True): anno_file = serializer.validated_data['annotation_file'] fd, filename = mkstemp(prefix='cvat_{}'.format(pk)) with open(filename, 'wb+') as f: for chunk in anno_file.chunks(): f.write(chunk) av_scan_paths(filename) rq_job = queue.enqueue_call( func=rq_func, args=(pk, filename, format_name), job_id=rq_id ) rq_job.meta['tmp_file'] = filename rq_job.meta['tmp_file_descriptor'] = fd rq_job.save_meta() else: if rq_job.is_finished: os.close(rq_job.meta['tmp_file_descriptor']) os.remove(rq_job.meta['tmp_file']) rq_job.delete() return Response(status=status.HTTP_201_CREATED) elif rq_job.is_failed: os.close(rq_job.meta['tmp_file_descriptor']) os.remove(rq_job.meta['tmp_file']) exc_info = str(rq_job.exc_info) rq_job.delete() return Response(data=exc_info, status=status.HTTP_500_INTERNAL_SERVER_ERROR) return Response(status=status.HTTP_202_ACCEPTED)
def import_task(filename, user): av_scan_paths(filename) task_importer = TaskImporter(filename, user) db_task = task_importer.import_task() return db_task.id
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) meta_info_file = [] media = _count_files(data, meta_info_file) media, task_mode = _validate_data(media, meta_info_file) if meta_info_file: assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') extractor = MEDIA_TYPES[media_type]['extractor']( source_path=[os.path.join(upload_dir, f) for f in media_files], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format( progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality) original_chunk_writer = original_chunk_writer_class(100) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size(0) area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue if task_mode == MEDIA_TYPES['video']['mode']: try: if meta_info_file: try: from cvat.apps.engine.prepare import UploadedMeta if os.path.split(meta_info_file[0])[0]: os.replace( os.path.join(upload_dir, meta_info_file[0]), db_data.get_meta_path()) meta_info = UploadedMeta( source_path=os.path.join( upload_dir, media_files[0]), meta_path=db_data.get_meta_path()) meta_info.check_seek_key_frames() meta_info.check_frames_numbers() meta_info.save_meta_info() assert len( meta_info.key_frames) > 0, 'No key frames.' except Exception as ex: base_msg = str(ex) if isinstance(ex, AssertionError) else \ 'Invalid meta information was upload.' job.meta[ 'status'] = '{} Start prepare valid meta information.'.format( base_msg) job.save_meta() meta_info, smooth_decoding = prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) assert smooth_decoding == True, 'Too few keyframes for smooth video decoding.' else: meta_info, smooth_decoding = prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) assert smooth_decoding == True, 'Too few keyframes for smooth video decoding.' all_frames = meta_info.get_task_size() video_size = meta_info.frame_sizes db_data.size = len( range( db_data.start_frame, min( data['stop_frame'] + 1 if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_meta_path()): os.remove(db_data.get_meta_path()) base_msg = str(ex) if isinstance( ex, AssertionError ) else "Uploaded video does not support a quick way of task creating." job.meta[ 'status'] = "{} The task will be created using the old method".format( base_msg) job.save_meta() else: #images,archive db_data.size = len(extractor) counter = itertools.count() for chunk_number, chunk_frames in itertools.groupby( extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] with open(db_data.get_dummy_chunk_path(chunk_number), 'w') as dummy_chunk: for path, frame_id in chunk_paths: dummy_chunk.write(path + '\n') img_sizes.append( extractor.get_image_size(frame_id)) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) else: upload_dir = settings.SHARE_ROOT av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = StorageChoice.LOCAL extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() validate_dimension.set_path(os.path.split(extractor.get_zip_filename())[0]) validate_dimension.validate() if validate_dimension.dimension == DimensionType.DIM_3D: db_task.dimension = DimensionType.DIM_3D extractor.reconcile( source_files=list(validate_dimension.related_files.keys()), step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=DimensionType.DIM_3D, ) extractor.add_files(validate_dimension.converted_files) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size(0) area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest['properties']['length'] video_size = manifest['properties']['resolution'] manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else:# images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[frame_id] if db_task.dimension == DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': if validate_dimension.dimension == DimensionType.DIM_2D: models.Image.objects.bulk_create(db_images) else: related_file = [] for image_data in db_images: image_model = models.Image( data=image_data.data, path=image_data.path, frame=image_data.frame, width=image_data.width, height=image_data.height ) image_model.save() image_data = models.Image.objects.get(id=image_model.id) if validate_dimension.related_files.get(image_data.path, None): for related_image_file in validate_dimension.related_files[image_data.path]: related_file.append( RelatedFile(data=db_data, primary_image_id=image_data.id, path=related_image_file)) RelatedFile.objects.bulk_create(related_file) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(tid, data, isImport=False): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files']: if db_data.storage != models.StorageChoice.CLOUD_STORAGE: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT else: # cloud storage if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance(cloud_provider=db_cloud_storage.provider_type, **details) first_sorted_media_image = sorted(media['image'])[0] cloud_storage_instance.download_file(first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file[0]) ) cloud_storage_manifest.set_index() media_files = sorted(media['image']) content = cloud_storage_manifest.get_subset(media_files) manifest.create(content) manifest.init_index() av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len(media['image']): media['image'].extend( [os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor']( source_path=[os.path.join(upload_dir, f) for f in media['directory']], ).absolute_source_paths ] ) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count() > 1 and db_task.project.tasks.first().dimension != validate_dimension.dimension: raise Exception(f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})') if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D extractor.reconcile( source_files=[os.path.join(upload_dir, f) for f in validate_dimension.related_files.keys()], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search(r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data['use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format(progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class(db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists(db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator(source_path=os.path.join(upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status('{} Start prepare a valid manifest file.'.format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager(db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size ) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status("{} The task will be created using the old method".format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == models.DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, meta={ k: {'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir ) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'meta': { 'related_images': related_images[''.join((name, ext))] }, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby(extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby(extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk(chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image( data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create( data=db_data, path=os.path.relpath(video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format(db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(db_task, data, isBackupRestore=False, isDatasetImport=False): if isinstance(db_task, int): db_task = models.Task.objects.select_for_update().get(pk=db_task) slogger.glob.info("create task #{}".format(db_task.id)) db_data = db_task.data upload_dir = db_data.get_upload_dirname() if data['remote_files'] and not isDatasetImport: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_files = [] media = _count_files(data, manifest_files) media, task_mode = _validate_data(media, manifest_files) if data['server_files']: if db_data.storage == models.StorageChoice.LOCAL: _copy_data_from_source(data['server_files'], upload_dir, data.get('server_files_path')) elif db_data.storage == models.StorageChoice.SHARE: upload_dir = settings.SHARE_ROOT manifest_root = None if db_data.storage in { models.StorageChoice.LOCAL, models.StorageChoice.SHARE }: manifest_root = upload_dir elif db_data.storage == models.StorageChoice.CLOUD_STORAGE: manifest_root = db_data.cloud_storage.get_storage_dirname() manifest_file = _validate_manifest(manifest_files, manifest_root) if manifest_file and (not settings.USE_CACHE or db_data.storage_method != models.StorageMethodChoice.CACHE): raise Exception( "File with meta information can be uploaded if 'Use cache' option is also selected" ) if data['server_files'] and db_data.storage == models.StorageChoice.CLOUD_STORAGE: if not manifest_file: raise Exception('A manifest file not found') db_cloud_storage = db_data.cloud_storage credentials = Credentials() credentials.convert_from_db({ 'type': db_cloud_storage.credentials_type, 'value': db_cloud_storage.credentials, }) details = { 'resource': db_cloud_storage.resource, 'credentials': credentials, 'specific_attributes': db_cloud_storage.get_specific_attributes() } cloud_storage_instance = get_cloud_storage_instance( cloud_provider=db_cloud_storage.provider_type, **details) sorted_media = sort(media['image'], data['sorting_method']) first_sorted_media_image = sorted_media[0] cloud_storage_instance.download_file( first_sorted_media_image, os.path.join(upload_dir, first_sorted_media_image)) # prepare task manifest file from cloud storage manifest file # NOTE we should create manifest before defining chunk_size # FIXME in the future when will be implemented archive support manifest = ImageManifestManager(db_data.get_manifest_path()) cloud_storage_manifest = ImageManifestManager( os.path.join(db_data.cloud_storage.get_storage_dirname(), manifest_file), db_data.cloud_storage.get_storage_dirname()) cloud_storage_manifest.set_index() sequence, content = cloud_storage_manifest.get_subset(sorted_media) sorted_content = (i[1] for i in sorted(zip(sequence, content))) manifest.create(sorted_content) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None manifest_index = _get_manifest_frame_indexer() # If upload from server_files image and directories # need to update images list by all found images in directories if (data['server_files']) and len(media['directory']) and len( media['image']): media['image'].extend([ os.path.relpath(image, upload_dir) for image in MEDIA_TYPES['directory']['extractor'](source_path=[ os.path.join(upload_dir, f) for f in media['directory'] ], ).absolute_source_paths ]) media['directory'] = [] for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') if ( isDatasetImport or isBackupRestore ) and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer( db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' source_paths = [os.path.join(upload_dir, f) for f in media_files] if manifest_file and not isBackupRestore and data[ 'sorting_method'] in { models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED }: raise Exception( "It isn't supported to upload manifest file and use random sorting" ) if isBackupRestore and db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM and \ data['sorting_method'] in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED}: raise Exception( "It isn't supported to import the task that was created without cache but with random/predefined sorting" ) details = { 'source_path': source_paths, 'step': db_data.get_frame_step(), 'start': db_data.start_frame, 'stop': data['stop_frame'], } if media_type in { 'archive', 'zip', 'pdf' } and db_data.storage == models.StorageChoice.SHARE: details['extract_dir'] = db_data.get_upload_dirname() upload_dir = db_data.get_upload_dirname() db_data.storage = models.StorageChoice.LOCAL if media_type != 'video': details['sorting_method'] = data['sorting_method'] extractor = MEDIA_TYPES[media_type]['extractor'](**details) validate_dimension = ValidateDimension() if isinstance(extractor, MEDIA_TYPES['zip']['extractor']): extractor.extract() if db_data.storage == models.StorageChoice.LOCAL or \ (db_data.storage == models.StorageChoice.SHARE and \ isinstance(extractor, MEDIA_TYPES['zip']['extractor'])): validate_dimension.set_path(upload_dir) validate_dimension.validate() if db_task.project is not None and db_task.project.tasks.count( ) > 1 and db_task.project.tasks.first( ).dimension != validate_dimension.dimension: raise Exception( f'Dimension ({validate_dimension.dimension}) of the task must be the same as other tasks in project ({db_task.project.tasks.first().dimension})' ) if validate_dimension.dimension == models.DimensionType.DIM_3D: db_task.dimension = models.DimensionType.DIM_3D keys_of_related_files = validate_dimension.related_files.keys() absolute_keys_of_related_files = [ os.path.join(upload_dir, f) for f in keys_of_related_files ] # When a task is created, the sorting method can be random and in this case, reinitialization will be with correct sorting # but when a task is restored from a backup, a random sorting is changed to predefined and we need to manually sort files # in the correct order. source_files = absolute_keys_of_related_files if not isBackupRestore else \ [item for item in extractor.absolute_source_paths if item in absolute_keys_of_related_files] extractor.reconcile( source_files=source_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=models.DimensionType.DIM_3D, ) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search( r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) if isBackupRestore and not isinstance(extractor, MEDIA_TYPES['video']['extractor']) and db_data.storage_method == models.StorageMethodChoice.CACHE and \ db_data.sorting_method in {models.SortingMethod.RANDOM, models.SortingMethod.PREDEFINED} and validate_dimension.dimension != models.DimensionType.DIM_3D: # we should sort media_files according to the manifest content sequence # and we should do this in general after validation step for 3D data and after filtering from related_images manifest = ImageManifestManager(db_data.get_manifest_path()) manifest.set_index() sorted_media_files = [] for idx in range(len(extractor.absolute_source_paths)): properties = manifest[idx] image_name = properties.get('name', None) image_extension = properties.get('extension', None) full_image_path = os.path.join( upload_dir, f"{image_name}{image_extension}" ) if image_name and image_extension else None if full_image_path and full_image_path in extractor: sorted_media_files.append(full_image_path) media_files = sorted_media_files.copy() del sorted_media_files data['sorting_method'] = models.SortingMethod.PREDEFINED extractor.reconcile( source_files=media_files, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], sorting_method=data['sorting_method'], ) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_message = 'Images are being compressed' if not progress: status_message = '{} {}'.format( status_message, progress_animation[update_progress.call_counter]) job.meta['status'] = status_message job.meta['task_progress'] = progress or 0. job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == models.DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == models.DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == models.DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): if not (db_data.storage == models.StorageChoice.CLOUD_STORAGE): w, h = extractor.get_image_size(0) else: img_properties = manifest[0] w, h = img_properties['width'], img_properties['height'] area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == models.StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl' or 'some_manifest.jsonl') if manifest_file and not os.path.exists( db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file)) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator( source_path=os.path.join( upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest.video_length video_size = manifest.video_resolution manifest_is_prepared = True except Exception as ex: manifest.remove() if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status( '{} Start prepare a valid manifest file.'. format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager( db_data.get_manifest_path()) manifest.link(media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) manifest.create() _update_status('A manifest had been created') all_frames = len(manifest.reader) video_size = manifest.reader.resolution manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = models.StorageMethodChoice.FILE_SYSTEM manifest.remove() del manifest base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status( "{} The task will be created using the old method". format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: manifest.link( sources=extractor.absolute_source_paths, meta={ k: { 'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir, DIM_3D=( db_task.dimension == models.DimensionType.DIM_3D), ) manifest.create() else: manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby( extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for chunk_path, frame_id in chunk_paths: properties = manifest[manifest_index(frame_id)] # check mapping if not chunk_path.endswith( f"{properties['name']}{properties['extension']}" ): raise Exception( 'Incorrect file mapping to manifest content') if db_task.dimension == models.DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ models.RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] models.RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) media = _count_files(data) media, task_mode = _validate_data(media) if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') extractor = MEDIA_TYPES[media_type]['extractor']( source_path=[os.path.join(upload_dir, f) for f in media_files], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format( progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality) original_chunk_writer = original_chunk_writer_class(100) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size() area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path(chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Founded frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) media = _count_files(data) media, task_mode = _validate_data(media) if data['server_files']: _copy_data_from_share(data['server_files'], upload_dir) av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') extractor = MEDIA_TYPES[media_type]['extractor']( source_path=[os.path.join(upload_dir, f) for f in media_files], step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format( progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter original_chunk_writer_class = Mpeg4ChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO else ZipChunkWriter compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality) original_chunk_writer = original_chunk_writer_class(100) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size() area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if media_files: if task_mode == MEDIA_TYPES['video']['mode']: try: analyzer = AnalyzeVideo(source_path=os.path.join( upload_dir, media_files[0])) analyzer.check_type_first_frame() analyzer.check_video_timestamps_sequences() meta_info = PrepareInfo( source_path=os.path.join(upload_dir, media_files[0]), meta_path=os.path.join(upload_dir, 'meta_info.txt')) meta_info.save_key_frames() meta_info.check_seek_key_frames() meta_info.save_meta_info() all_frames = meta_info.get_task_size() db_data.size = len( range( db_data.start_frame, min( data['stop_frame'] + 1 if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) frame = meta_info.key_frames.get( next(iter(meta_info.key_frames))) video_size = (frame.width, frame.height) except Exception: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM else: #images,archive counter_ = itertools.count() if isinstance(extractor, MEDIA_TYPES['archive']['extractor']): media_files = [ os.path.relpath(path, upload_dir) for path in extractor._source_path ] elif isinstance(extractor, (MEDIA_TYPES['zip']['extractor'], MEDIA_TYPES['pdf']['extractor'])): media_files = extractor._source_path numbers_sequence = range( db_data.start_frame, min( data['stop_frame'] if data['stop_frame'] else len(media_files), len(media_files)), db_data.get_frame_step()) m_paths = [] m_paths = [(path, numb) for numb, path in enumerate(sorted(media_files)) if numb in numbers_sequence] for chunk_number, media_paths in itertools.groupby( m_paths, lambda x: next(counter_) // db_data.chunk_size): media_paths = list(media_paths) img_sizes = [] from PIL import Image with open(db_data.get_dummy_chunk_path(chunk_number), 'w') as dummy_chunk: for path, _ in media_paths: dummy_chunk.write(path + '\n') img_sizes += [ Image.open(os.path.join(upload_dir, path)).size ] db_data.size += len(media_paths) db_images.extend([ models.Image(data=db_data, path=data[0], frame=data[1], width=size[0], height=size[1]) for data, size in zip(media_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Founded frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def create_or_update(dl_model_id, name, model_file, weights_file, labelmap_file, interpretation_file, owner, storage, is_shared): def get_abs_path(share_path): if not share_path: return share_path share_root = settings.SHARE_ROOT relpath = os.path.normpath(share_path).lstrip('/') if '..' in relpath.split(os.path.sep): raise Exception('Permission denied') abspath = os.path.abspath(os.path.join(share_root, relpath)) if os.path.commonprefix([share_root, abspath]) != share_root: raise Exception('Bad file path on share: ' + abspath) return abspath def save_file_as_tmp(data): if not data: return None fd, filename = tempfile.mkstemp() with open(filename, 'wb') as tmp_file: for chunk in data.chunks(): tmp_file.write(chunk) os.close(fd) return filename is_create_request = dl_model_id is None if is_create_request: dl_model_id = create_empty(owner=owner) run_tests = bool(model_file or weights_file or labelmap_file or interpretation_file) if storage != "local": model_file = get_abs_path(model_file) weights_file = get_abs_path(weights_file) labelmap_file = get_abs_path(labelmap_file) interpretation_file = get_abs_path(interpretation_file) else: model_file = save_file_as_tmp(model_file) weights_file = save_file_as_tmp(weights_file) labelmap_file = save_file_as_tmp(labelmap_file) interpretation_file = save_file_as_tmp(interpretation_file) files_to_scan = [] if model_file: files_to_scan.append(model_file) if weights_file: files_to_scan.append(weights_file) if labelmap_file: files_to_scan.append(labelmap_file) if interpretation_file: files_to_scan.append(interpretation_file) av_scan_paths(*files_to_scan) if owner: restricted = not has_admin_role(owner) else: restricted = not has_admin_role( AnnotationModel.objects.get(pk=dl_model_id).owner) rq_id = "auto_annotation.create.{}".format(dl_model_id) queue = django_rq.get_queue("default") queue.enqueue_call(func=_update_dl_model_thread, args=(dl_model_id, name, is_shared, model_file, weights_file, labelmap_file, interpretation_file, run_tests, storage == "local", is_create_request, restricted), job_id=rq_id) return rq_id
def _import_project(filename, user, org_id): av_scan_paths(filename) project_importer = ProjectImporter(filename, user, org_id) db_project = project_importer.import_project() return db_project.id