def add_exifs(self, entries): """Add metadata to multiple files. Args: entries: Iterable of (path, sha256, exif) tuples. Where exif is any dictionary-like object with exif attributes. """ # Split the work into chunks for chunk in chunks(entries, size=1000): with self.database.session_scope() as session: index = {(path, sha256): exif for path, sha256, exif in chunk} query = session.query(Files).options(joinedload(Files.exif)) files = query.filter(self._by_path_and_hash(list( index.keys()))).all() # Update existing files for file in files: exif = index.pop((file.file_path, file.sha256)) exif_entity = file.exif or Exif(file=file) self._update_exif(exif_entity, exif) file.exif = exif_entity # Create missing files new_files = [] for (path, sha256), exif in index.items(): new_file = Files(file_path=path, sha256=sha256) exif_entity = Exif(file=new_file) self._update_exif(exif_entity, exif) new_file.exif = exif_entity new_files.append(new_file) session.add_all(new_files)
def add_metadata(self, entries): """Add metadata to multiple files. Args: entries: Iterable of (path, sha256, metadata) tuples. Where metadata is any dictionary-like with metadata attributes. """ # Split the work into chunks for chunk in chunks(entries, size=1000): with self.database.session_scope() as session: index = {(path, sha256): metadata for path, sha256, metadata in chunk} query = session.query(Files).options(joinedload(Files.meta)) files = query.filter(self._by_path_and_hash(list( index.keys()))).all() # Update existing files for file in files: metadata = index.pop((file.file_path, file.sha256)) metadata_entity = file.meta or VideoMetadata(file=file) self._update_metadata(metadata_entity, metadata) file.meta = metadata_entity # Create missing files new_files = [] for (path, sha256), metadata in index.items(): new_file = Files(file_path=path, sha256=sha256) metadata_entity = VideoMetadata(file=new_file) self._update_metadata(metadata_entity, metadata) new_file.meta = metadata_entity new_files.append(new_file) session.add_all(new_files)
def add_signatures(self, entries): """Bulk add signatures. Args: entries: Iterable of (path, sha256, signature) tuples. """ # Split the work into chunks for chunk in chunks(entries, size=1000): with self.database.session_scope() as session: index = {(path, sha256, url): sig for path, sha256, url, sig in chunk} query = (session.query(Files).options( joinedload(Files.signature))) files = query.filter(self._by_path_and_hash(list( index.keys()))).all() # Update existing files for file in files: sig_value = index.pop( (file.file_path, file.sha256, file.file_url)) sig_entity = file.signature or Signature(file_id=file.id) sig_entity.signature = sig_value file.signature = sig_entity # Create missing files new_files = [] for (path, sha256, url), sig_value in index.items(): new_files.append( Files(file_path=path, sha256=sha256, file_url=url, signature=Signature(signature=sig_value))) session.add_all(new_files)
def test_transform_match(): source = Files(id=1, file_path="foo") target = Files(id=2, file_path="bar") match = Matches(query_video_file=source, match_video_file=target, distance=0.5) # Check outgoing match data = Transform.file_match_dict(match, source.id) assert data["distance"] == match.distance assert data["file"]["id"] == target.id assert data["file"]["file_path"] == target.file_path # Check incoming match data = Transform.file_match_dict(match, target.id) assert data["distance"] == match.distance assert data["file"]["id"] == source.id assert data["file"]["file_path"] == source.file_path
def make_file(prefix="", length=42, ext="flv", audio=True, date=datetime.date(2000, 1, 1), scenes=((0, 1), (1, 2))): """Create unique file.""" path = f"{prefix}some/path/{uuid()}.{ext}" sha256 = f"hash-of-{path}" return Files(file_path=path, sha256=sha256, exif=Exif(General_FileExtension=ext, Audio_Duration=float(audio), General_Encoded_Date=date, General_Duration=length), meta=VideoMetadata(), scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes])
def add_file_signature(self, path, sha256, sig_value): """Add video file signature.""" with self.database.session_scope() as session: query = session.query(Files).options(joinedload(Files.signature)) file = query.filter(Files.file_path == path, Files.sha256 == sha256).one_or_none() file = file or Files(file_path=path, sha256=sha256) sig_entity = file.signature or Signature(file_id=file.id) sig_entity.signature = sig_value file.signature = sig_entity session.add(file)
def test_transform_partial(): file = Files(file_path="foo", sha256="bar", meta=None, exif=None, signature=None) data = Transform.file_dict(file, meta=True, signature=True, exif=True) assert data.get("meta") is None assert data.get("exif") is None assert data.get("signature") is None
def test_transform_file(): file = Files(file_path="foo", exif=Exif(General_FileSize=42.0)) # Exclude exif data = Transform.file_dict(file) assert data["file_path"] == file.file_path assert "exif" not in data # Include exif data = Transform.file_dict(file, exif=True) assert data["file_path"] == file.file_path assert data["exif"]["General_FileSize"] == file.exif.General_FileSize
def add_scenes(self, entries, override=False): """Bulk add scenes. Args: entries: Iterable of (path, sha256, durations) tuples. Where durations is an iterable of scene durations in seconds. override: Delete existing scenes if any. """ # Split the work into chunks for chunk in chunks(entries, size=1000): with self.database.session_scope() as session: index = {(path, sha256): durations for path, sha256, durations in chunk} query = session.query(Files).options(joinedload(Files.scenes)) files = query.filter(self._by_path_and_hash(list( index.keys()))).all() # Delete existing scenes if needed if override: self._delete_file_scenes(session, *files) # Update existing files for file in files: durations = index.pop((file.file_path, file.sha256)) # Skip write operation if scenes already exist if len(file.scenes) > 0: continue # Otherwise write scenes file.scenes = self._create_scenes(file, durations) # Create missing files new_files = [] for (path, sha256), durations in index.items(): new_file = Files(file_path=path, sha256=sha256) new_file.scenes = self._create_scenes(new_file, durations) new_files.append(new_file) session.add_all(new_files)
def make_file(prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))): """Create unique file.""" path = f"{prefix}some/path/{uuid()}.{ext}" sha256 = f"hash-of-{path}" return Files(file_path=path, sha256=sha256, exif=Exif(General_FileExtension=ext, General_Duration=length * 1000), meta=VideoMetadata(), scenes=[ Scene(start_time=start, duration=duration) for start, duration in scenes ])
def add_file_exif(self, path, sha256, exif): """Add a single file EXIF attributes. Args: path (String): Source video file path. sha256 (String): Source video file hash. exif: Dictionary object containing EXIF attributes. """ with self.database.session_scope() as session: query = session.query(Files).options(joinedload(Files.exif)) file = query.filter(Files.file_path == path, Files.sha256 == sha256).one_or_none() file = file or Files(file_path=path, sha256=sha256) exif_entity = file.exif or Exif(file=file) self._update_exif(exif_entity, exif) file.exif = exif_entity session.add(exif_entity)
def add_file_metadata(self, path, sha256, metadata): """Add a single file metadata. Args: path (String): Source video file path. sha256 (String): Source video file hash. metadata: Dictionary object containing metadata attributes. """ with self.database.session_scope() as session: query = session.query(Files).options(joinedload(Files.meta)) file = query.filter(Files.file_path == path, Files.sha256 == sha256).one_or_none() file = file or Files(file_path=path, sha256=sha256) metadata_entity = file.meta or VideoMetadata(file=file) self._update_metadata(metadata_entity, metadata) file.meta = metadata_entity session.add(metadata_entity)
def add_file_scenes(self, path, sha256, durations, override=False): """Add scenes for a single video file.""" with self.database.session_scope() as session: query = session.query(Files).options(joinedload(Files.scenes)) file = query.filter(Files.file_path == path, Files.sha256 == sha256).one_or_none() file = file or Files(file_path=path, sha256=sha256) # Delete existing scenes if needed if override: self._delete_file_scenes(session, file) # Skip write operation if scenes already exist if len(file.scenes) > 0: return # Write new scenes file.scenes = self._create_scenes(file, durations) session.add_all(file.scenes)
def _files_for_matches(session, connections): """ Get or create files for connections of the form (path_1,sha256_1,path_2,sha256_2). """ file_identifiers = set( DBResultStorage._matches_file_identifiers(connections)) existing_files = (session.query(Files).filter( DBResultStorage._by_path_and_hash(file_identifiers)).all()) # Get missing files (path,hash) ids for file in existing_files: file_identifiers.remove((file.file_path, file.sha256)) # Create missing files new_files = [] for path, sha256 in file_identifiers: new_file = Files(file_path=path, sha256=sha256) new_files.append(new_file) session.add_all(new_files) return existing_files + new_files