def fill_task_meta_data_forward(apps, schema_editor): db_alias = schema_editor.connection.alias task_model = apps.get_model('engine', 'Task') video_model = apps.get_model('engine', "Video") image_model = apps.get_model('engine', 'Image') for db_task in task_model.objects.all(): if db_task.mode == 'interpolation': db_video = video_model() db_video.task_id = db_task.id db_video.start_frame = 0 db_video.stop_frame = db_task.size db_video.step = 1 video = "" for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) videos = list( filter(lambda x: get_mime(x) == 'video', fullnames)) if len(videos): video = videos[0] break db_video.path = video try: image = Image.open(_get_frame_path(db_task, 0)) db_video.width = image.width db_video.height = image.height image.close() except FileNotFoundError: db_video.width = 0 db_video.height = 0 db_video.save() else: filenames = [] for root, _, files in os.walk(_get_upload_dirname(db_task)): fullnames = map(lambda f: os.path.join(root, f), files) images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() db_images = [] for i, image_path in enumerate(filenames): db_image = image_model() db_image.task_id = db_task.id db_image.path = image_path db_image.frame = i try: image = Image.open(image_path) db_image.width = image.width db_image.height = image.height image.close() except FileNotFoundError: db_image.width = 0 db_image.height = 0 db_images.append(db_image) image_model.objects.using(db_alias).bulk_create(db_images)
def make_image_meta_cache(db_task): with open(db_task.get_image_meta_cache_path(), 'w') as meta_file: cache = { 'original_size': [] } if db_task.mode == 'interpolation': image = Image.open(db_task.get_frame_path(0)) cache['original_size'].append({ 'width': image.size[0], 'height': image.size[1] }) image.close() else: filenames = [] for root, _, files in os.walk(db_task.get_upload_dirname()): fullnames = map(lambda f: os.path.join(root, f), files) images = filter(lambda x: get_mime(x) == 'image', fullnames) filenames.extend(images) filenames.sort() for image_path in filenames: image = Image.open(image_path) cache['original_size'].append({ 'width': image.size[0], 'height': image.size[1] }) image.close() meta_file.write(str(cache))
def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): mime = get_mime(full_path) if mime in counter: counter[mime].append(rel_path) else: slogger.glob.warn("Skip '{}' file (its mime type doesn't " "correspond to a video or an image file)".format(full_path))
def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): mime = get_mime(full_path) if mime in counter: counter[mime].append(rel_path) elif rel_path.endswith('.jsonl'): manifest_files.append(rel_path) else: slogger.glob.warn( "Skip '{}' file (its mime type doesn't " "correspond to supported MIME file type)".format( full_path))
def migrate2manifest(apps, shema_editor): logger = get_logger(MIGRATION_NAME, MIGRATION_LOG) logger.info( 'The data migration has been started for creating manifest`s files') query_set = _get_query_set(apps) logger.info('Need to update {} data objects'.format(len(query_set))) for db_data in query_set: try: upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) logger.info('Migrate data({}), folder - {}'.format( db_data.id, upload_dir)) if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')): os.remove(os.path.join(upload_dir, 'meta_info.txt')) logger.info( '{}/meta_info.txt has been deleted'.format(upload_dir)) else: for path in glob.glob(f'{upload_dir}/dummy_*.txt'): os.remove(path) logger.info(f"{path} has been deleted") # it's necessary for case with long data migration if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): logger.info('Manifest file already exists') continue data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): media_file = os.path.join(data_dir, db_data.video.path) manifest = VideoManifestManager(manifest_path=upload_dir) logger.info( 'Preparing of the video meta information has begun') meta_info = manifest.prepare_meta(media_file=media_file, force=True) logger.info('Manifest creating has begun') manifest.create(meta_info) logger.info('Index creating has begun') manifest.init_index() else: manifest = ImageManifestManager(manifest_path=upload_dir) sources = [] if db_data.storage == StorageChoice.LOCAL: for (root, _, files) in os.walk(data_dir): sources.extend([ os.path.join(root, f) for f in files if get_mime(f) == 'image' ]) sources.sort() # using share, this means that we can not explicitly restore the entire data structure else: sources = [ os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame') ] if any( list( filter( lambda x: x.dimension == DimensionType.DIM_3D, db_data.tasks.all()))): logger.info( 'Preparing of images 3d meta information has begun') content = [] for source in sources: name, ext = os.path.splitext( os.path.relpath(source, upload_dir)) content.append({'name': name, 'extension': ext}) else: logger.info( 'Preparing of 2d images meta information has begun') meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir) content = meta_info.content if db_data.storage == StorageChoice.SHARE: def _get_frame_step(str_): match = search("step\s*=\s*([1-9]\d*)", str_) return int(match.group(1)) if match else 1 logger.info( 'Data is located on the share, metadata update has been started' ) step = _get_frame_step(db_data.frame_filter) start = db_data.start_frame stop = db_data.stop_frame + 1 images_range = range(start, stop, step) result_content = [] for i in range(stop): item = content.pop(0) if i in images_range else dict() result_content.append(item) content = result_content logger.info('Manifest creating has begun') manifest.create(content) logger.info('Index creating has begun') manifest.init_index() logger.info('Succesfull migration for the data({})'.format( db_data.id)) except Exception as ex: logger.error(str(ex))
def migrate_task_data(db_task_id, db_data_id, original_video, original_images, size, start_frame, stop_frame, frame_filter, image_quality, chunk_size, return_dict): try: db_data_dir = os.path.join(settings.MEDIA_DATA_ROOT, str(db_data_id)) compressed_cache_dir = os.path.join(db_data_dir, 'compressed') original_cache_dir = os.path.join(db_data_dir, 'original') old_db_task_dir = os.path.join(settings.DATA_ROOT, str(db_task_id)) old_task_data_dir = os.path.join(old_db_task_dir, 'data') if os.path.exists(old_task_data_dir) and size != 0: if original_video: if os.path.exists(original_video): _stop_frame = stop_frame if stop_frame else None reader = VideoReader([original_video], get_frame_step(frame_filter), start_frame, _stop_frame) original_chunk_writer = Mpeg4ChunkWriter(100) compressed_chunk_writer = ZipCompressedChunkWriter(image_quality) counter = itertools.count() generator = itertools.groupby(reader, lambda x: next(counter) // chunk_size) for chunk_idx, chunk_images in generator: chunk_images = list(chunk_images) original_chunk_path = os.path.join(original_cache_dir, '{}.mp4'.format(chunk_idx)) original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path) compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx)) compressed_chunk_writer.save_as_chunk(chunk_images, compressed_chunk_path) preview = reader.get_preview() preview.save(os.path.join(db_data_dir, 'preview.jpeg')) else: original_chunk_writer = ZipChunkWriter(100) for chunk_idx, chunk_image_ids in enumerate(slice_by_size(range(size), chunk_size)): chunk_images = [] for image_id in chunk_image_ids: image_path = get_frame_path(old_task_data_dir, image_id) chunk_images.append((image_path, image_path)) original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx)) original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path) compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx)) os.symlink(original_chunk_path, compressed_chunk_path) shutil.copyfile(get_frame_path(old_task_data_dir, image_id), os.path.join(db_data_dir, 'preview.jpeg')) else: reader = None if os.path.exists(original_images[0]): # task created from images reader = ImageListReader(original_images) else: # task created from archive or pdf archives = [] pdfs = [] zips = [] for p in glob.iglob(os.path.join(db_data_dir, 'raw', '**', '*'), recursive=True): mime_type = get_mime(p) if mime_type == 'archive': archives.append(p) elif mime_type == 'pdf': pdfs.append(p) elif mime_type == 'zip': zips.append(p) if archives: reader = ArchiveReader(archives) elif zips: reader = ZipReader(archives) elif pdfs: reader = PdfReader(pdfs) if not reader: original_chunk_writer = ZipChunkWriter(100) for chunk_idx, chunk_image_ids in enumerate(slice_by_size(range(size), chunk_size)): chunk_images = [] for image_id in chunk_image_ids: image_path = get_frame_path(old_task_data_dir, image_id) chunk_images.append((image_path, image_path)) original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx)) original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path) compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx)) os.symlink(original_chunk_path, compressed_chunk_path) shutil.copyfile(get_frame_path(old_task_data_dir, image_id), os.path.join(db_data_dir, 'preview.jpeg')) else: original_chunk_writer = ZipChunkWriter(100) compressed_chunk_writer = ZipCompressedChunkWriter(image_quality) counter = itertools.count() generator = itertools.groupby(reader, lambda x: next(counter) // chunk_size) for chunk_idx, chunk_images in generator: chunk_images = list(chunk_images) compressed_chunk_path = os.path.join(compressed_cache_dir, '{}.zip'.format(chunk_idx)) compressed_chunk_writer.save_as_chunk(chunk_images, compressed_chunk_path) original_chunk_path = os.path.join(original_cache_dir, '{}.zip'.format(chunk_idx)) original_chunk_writer.save_as_chunk(chunk_images, original_chunk_path) preview = reader.get_preview() preview.save(os.path.join(db_data_dir, 'preview.jpeg')) shutil.rmtree(old_db_task_dir) return_dict[db_task_id] = (True, '') except Exception as e: traceback.print_exc(file=sys.stderr) return_dict[db_task_id] = (False, str(e)) return 0
def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): mime = get_mime(full_path) counter[mime].append(rel_path)
def _validate_data(data): share_root = settings.SHARE_ROOT server_files = { 'dirs': [], 'files': [], } for path in data["server_files"]: path = os.path.normpath(path).lstrip('/') if '..' in path.split(os.path.sep): raise ValueError("Don't use '..' inside file paths") full_path = os.path.abspath(os.path.join(share_root, path)) if 'directory' == get_mime(full_path): server_files['dirs'].append(path) else: server_files['files'].append(path) if os.path.commonprefix([share_root, full_path]) != share_root: raise ValueError("Bad file path: " + path) # Remove directories if other files from them exists in server files data['server_files'] = server_files['files'] + [ dir_name for dir_name in server_files['dirs'] if not [ f_name for f_name in server_files['files'] if f_name.startswith(dir_name) ] ] def count_files(file_mapping, counter): for rel_path, full_path in file_mapping.items(): mime = get_mime(full_path) counter[mime].append(rel_path) counter = {media_type: [] for media_type in MEDIA_TYPES.keys()} count_files( file_mapping={ f: f for f in data['remote_files'] or data['client_files'] }, counter=counter, ) count_files( file_mapping={ f: os.path.abspath(os.path.join(share_root, f)) for f in data['server_files'] }, counter=counter, ) unique_entries = 0 multiple_entries = 0 for media_type, media_config in MEDIA_TYPES.items(): if counter[media_type]: if media_config['unique']: unique_entries += len(counter[media_type]) else: multiple_entries += len(counter[media_type]) if unique_entries == 1 and multiple_entries > 0 or unique_entries > 1: unique_types = ', '.join( [k for k, v in MEDIA_TYPES.items() if v['unique']]) multiply_types = ', '.join( [k for k, v in MEDIA_TYPES.items() if not v['unique']]) count = ', '.join( ['{} {}(s)'.format(len(v), k) for k, v in counter.items()]) raise ValueError('Only one {} or many {} can be used simultaneously, \ but {} found.'.format(unique_types, multiply_types, count)) if unique_entries == 0 and multiple_entries == 0: raise ValueError('No media data found') return counter