def add_exifs(self, entries):
        """Add metadata to multiple files.

         Args:
             entries: Iterable of (path, sha256, exif) tuples. Where
                exif is any dictionary-like object with exif attributes.
        """
        # Split the work into chunks
        for chunk in chunks(entries, size=1000):
            with self.database.session_scope() as session:
                index = {(path, sha256): exif for path, sha256, exif in chunk}
                query = session.query(Files).options(joinedload(Files.exif))
                files = query.filter(self._by_path_and_hash(list(
                    index.keys()))).all()

                # Update existing files
                for file in files:
                    exif = index.pop((file.file_path, file.sha256))
                    exif_entity = file.exif or Exif(file=file)
                    self._update_exif(exif_entity, exif)
                    file.exif = exif_entity

                # Create missing files
                new_files = []
                for (path, sha256), exif in index.items():
                    new_file = Files(file_path=path, sha256=sha256)
                    exif_entity = Exif(file=new_file)
                    self._update_exif(exif_entity, exif)
                    new_file.exif = exif_entity
                    new_files.append(new_file)
                session.add_all(new_files)
    def add_metadata(self, entries):
        """Add metadata to multiple files.

        Args:
            entries: Iterable of (path, sha256, metadata) tuples. Where
                metadata is any dictionary-like with metadata attributes.
        """
        # Split the work into chunks
        for chunk in chunks(entries, size=1000):
            with self.database.session_scope() as session:
                index = {(path, sha256): metadata
                         for path, sha256, metadata in chunk}
                query = session.query(Files).options(joinedload(Files.meta))
                files = query.filter(self._by_path_and_hash(list(
                    index.keys()))).all()

                # Update existing files
                for file in files:
                    metadata = index.pop((file.file_path, file.sha256))
                    metadata_entity = file.meta or VideoMetadata(file=file)
                    self._update_metadata(metadata_entity, metadata)
                    file.meta = metadata_entity

                # Create missing files
                new_files = []
                for (path, sha256), metadata in index.items():
                    new_file = Files(file_path=path, sha256=sha256)
                    metadata_entity = VideoMetadata(file=new_file)
                    self._update_metadata(metadata_entity, metadata)
                    new_file.meta = metadata_entity
                    new_files.append(new_file)
                session.add_all(new_files)
    def add_signatures(self, entries):
        """Bulk add signatures.

        Args:
            entries: Iterable of (path, sha256, signature) tuples.
        """
        # Split the work into chunks
        for chunk in chunks(entries, size=1000):
            with self.database.session_scope() as session:
                index = {(path, sha256, url): sig
                         for path, sha256, url, sig in chunk}
                query = (session.query(Files).options(
                    joinedload(Files.signature)))

                files = query.filter(self._by_path_and_hash(list(
                    index.keys()))).all()

                # Update existing files
                for file in files:
                    sig_value = index.pop(
                        (file.file_path, file.sha256, file.file_url))
                    sig_entity = file.signature or Signature(file_id=file.id)
                    sig_entity.signature = sig_value
                    file.signature = sig_entity

                # Create missing files
                new_files = []
                for (path, sha256, url), sig_value in index.items():
                    new_files.append(
                        Files(file_path=path,
                              sha256=sha256,
                              file_url=url,
                              signature=Signature(signature=sig_value)))

                session.add_all(new_files)
def test_transform_match():
    source = Files(id=1, file_path="foo")
    target = Files(id=2, file_path="bar")
    match = Matches(query_video_file=source,
                    match_video_file=target,
                    distance=0.5)

    # Check outgoing match
    data = Transform.file_match_dict(match, source.id)
    assert data["distance"] == match.distance
    assert data["file"]["id"] == target.id
    assert data["file"]["file_path"] == target.file_path

    # Check incoming match
    data = Transform.file_match_dict(match, target.id)
    assert data["distance"] == match.distance
    assert data["file"]["id"] == source.id
    assert data["file"]["file_path"] == source.file_path
Example #5
0
def make_file(prefix="", length=42, ext="flv", audio=True, date=datetime.date(2000, 1, 1),
              scenes=((0, 1), (1, 2))):
    """Create unique file."""
    path = f"{prefix}some/path/{uuid()}.{ext}"
    sha256 = f"hash-of-{path}"
    return Files(file_path=path, sha256=sha256,
                 exif=Exif(General_FileExtension=ext, Audio_Duration=float(audio),
                           General_Encoded_Date=date, General_Duration=length),
                 meta=VideoMetadata(),
                 scenes=[Scene(start_time=start, duration=duration) for start, duration in scenes])
 def add_file_signature(self, path, sha256, sig_value):
     """Add video file signature."""
     with self.database.session_scope() as session:
         query = session.query(Files).options(joinedload(Files.signature))
         file = query.filter(Files.file_path == path,
                             Files.sha256 == sha256).one_or_none()
         file = file or Files(file_path=path, sha256=sha256)
         sig_entity = file.signature or Signature(file_id=file.id)
         sig_entity.signature = sig_value
         file.signature = sig_entity
         session.add(file)
def test_transform_partial():
    file = Files(file_path="foo",
                 sha256="bar",
                 meta=None,
                 exif=None,
                 signature=None)

    data = Transform.file_dict(file, meta=True, signature=True, exif=True)

    assert data.get("meta") is None
    assert data.get("exif") is None
    assert data.get("signature") is None
def test_transform_file():
    file = Files(file_path="foo", exif=Exif(General_FileSize=42.0))

    # Exclude exif
    data = Transform.file_dict(file)
    assert data["file_path"] == file.file_path
    assert "exif" not in data

    # Include exif
    data = Transform.file_dict(file, exif=True)
    assert data["file_path"] == file.file_path
    assert data["exif"]["General_FileSize"] == file.exif.General_FileSize
    def add_scenes(self, entries, override=False):
        """Bulk add scenes.

        Args:
            entries: Iterable of (path, sha256, durations) tuples. Where
                durations is an iterable of scene durations in seconds.
            override: Delete existing scenes if any.
        """
        # Split the work into chunks
        for chunk in chunks(entries, size=1000):
            with self.database.session_scope() as session:
                index = {(path, sha256): durations
                         for path, sha256, durations in chunk}
                query = session.query(Files).options(joinedload(Files.scenes))
                files = query.filter(self._by_path_and_hash(list(
                    index.keys()))).all()

                # Delete existing scenes if needed
                if override:
                    self._delete_file_scenes(session, *files)

                # Update existing files
                for file in files:
                    durations = index.pop((file.file_path, file.sha256))

                    # Skip write operation if scenes already exist
                    if len(file.scenes) > 0:
                        continue

                    # Otherwise write scenes
                    file.scenes = self._create_scenes(file, durations)

                # Create missing files
                new_files = []
                for (path, sha256), durations in index.items():
                    new_file = Files(file_path=path, sha256=sha256)
                    new_file.scenes = self._create_scenes(new_file, durations)
                    new_files.append(new_file)
                session.add_all(new_files)
def make_file(prefix="", length=42, ext="flv", scenes=((0, 1), (1, 2))):
    """Create unique file."""
    path = f"{prefix}some/path/{uuid()}.{ext}"
    sha256 = f"hash-of-{path}"
    return Files(file_path=path,
                 sha256=sha256,
                 exif=Exif(General_FileExtension=ext,
                           General_Duration=length * 1000),
                 meta=VideoMetadata(),
                 scenes=[
                     Scene(start_time=start, duration=duration)
                     for start, duration in scenes
                 ])
    def add_file_exif(self, path, sha256, exif):
        """Add a single file EXIF attributes.

        Args:
            path (String): Source video file path.
            sha256 (String): Source video file hash.
            exif: Dictionary object containing EXIF attributes.
        """
        with self.database.session_scope() as session:
            query = session.query(Files).options(joinedload(Files.exif))
            file = query.filter(Files.file_path == path,
                                Files.sha256 == sha256).one_or_none()
            file = file or Files(file_path=path, sha256=sha256)

            exif_entity = file.exif or Exif(file=file)
            self._update_exif(exif_entity, exif)
            file.exif = exif_entity
            session.add(exif_entity)
    def add_file_metadata(self, path, sha256, metadata):
        """Add a single file metadata.

        Args:
            path (String): Source video file path.
            sha256 (String): Source video file hash.
            metadata: Dictionary object containing metadata attributes.
        """
        with self.database.session_scope() as session:
            query = session.query(Files).options(joinedload(Files.meta))
            file = query.filter(Files.file_path == path,
                                Files.sha256 == sha256).one_or_none()

            file = file or Files(file_path=path, sha256=sha256)

            metadata_entity = file.meta or VideoMetadata(file=file)
            self._update_metadata(metadata_entity, metadata)
            file.meta = metadata_entity
            session.add(metadata_entity)
    def add_file_scenes(self, path, sha256, durations, override=False):
        """Add scenes for a single video file."""
        with self.database.session_scope() as session:
            query = session.query(Files).options(joinedload(Files.scenes))
            file = query.filter(Files.file_path == path,
                                Files.sha256 == sha256).one_or_none()

            file = file or Files(file_path=path, sha256=sha256)

            # Delete existing scenes if needed
            if override:
                self._delete_file_scenes(session, file)

            # Skip write operation if scenes already exist
            if len(file.scenes) > 0:
                return

            # Write new scenes
            file.scenes = self._create_scenes(file, durations)
            session.add_all(file.scenes)
    def _files_for_matches(session, connections):
        """
         Get or create files for connections of the form
         (path_1,sha256_1,path_2,sha256_2).
        """
        file_identifiers = set(
            DBResultStorage._matches_file_identifiers(connections))
        existing_files = (session.query(Files).filter(
            DBResultStorage._by_path_and_hash(file_identifiers)).all())

        # Get missing files (path,hash) ids
        for file in existing_files:
            file_identifiers.remove((file.file_path, file.sha256))

        # Create missing files
        new_files = []
        for path, sha256 in file_identifiers:
            new_file = Files(file_path=path, sha256=sha256)
            new_files.append(new_file)
        session.add_all(new_files)

        return existing_files + new_files