Beispiel #1
0
def entry_from_file_path(file_path: Path, source: BaseSource) -> Entry:
    """
    Creates an Entry template from a file path, filling the fields with file metadata.
    """
    mimetype = get_mimetype(file_path)
    entry = Entry(
        title=file_path.name,
        source=source.entry_source,
        schema=get_schema_from_mimetype(mimetype),
        extra_attributes={
            'file': {
                'checksum': get_checksum(file_path),
                'path': str(file_path.resolve()),
                'mimetype': mimetype,
            },
        },
    )
    entry.date_on_timeline = get_file_entry_date(entry)

    if mimetype:
        if mimetype.startswith('image/'):
            entry.schema = 'file.image'
            entry.extra_attributes.update(
                get_image_extra_attributes(file_path))
        if mimetype.startswith('video/'):
            entry.schema = 'file.video'
            try:
                entry.extra_attributes.update(
                    get_video_extra_attributes(file_path))
            except FileFormatError:
                logger.exception(
                    f"Could not read metadata for video {str(file_path)}")
        if mimetype.startswith('audio/'):
            entry.schema = 'file.audio'
            entry.extra_attributes.update(
                get_audio_extra_attributes(file_path))
        if mimetype.startswith('text/'):
            entry.schema = 'file.text'
            with file_path.open('r') as text_file:
                entry.description = text_file.read(
                    settings.MAX_PLAINTEXT_PREVIEW_SIZE)

    return entry
Beispiel #2
0
            elif mimetype and mimetype.startswith('video'):
                entry.schema = 'message.telegram.video'
            elif mimetype and mimetype.startswith('image'):
                entry.schema = 'message.telegram.image'
        else:
            entry = Entry()
            if message.get('media_type') == 'sticker':
                entry.schema = 'message.telegram.sticker'
            elif message.get('media_type') == 'animation':
                entry.schema = 'message.telegram.gif'
            else:
                entry.schema = 'message.telegram'

        entry.source = self.entry_source
        entry.description = self.get_message_text(message)
        entry.date_on_timeline = self.get_message_date(message)

        # Set message metadata
        if chat['type'] == 'personal_chat':
            # For personal chats, messages are from one user to another user.
            # In the telegram data, the chat ID is the same as the other user's ID.
            if message['from_id'] == self.account_id(
                    account):  # Outgoing private msg
                entry.extra_attributes.update({
                    'sender_name':
                    self.account_name(account),
                    'sender_id':
                    message['from_id'],
                    'recipient_name':
                    chat['name'],
                    'recipient_id':
Beispiel #3
0
def create_entries_from_directory(path: Path,
                                  source: BaseSource,
                                  backup_date: datetime,
                                  use_cache=True) -> List[Entry]:
    """
    Delete and recreate the Entries for the files in a directory.
    """
    timelineinclude_rules = list(
        get_include_rules_for_dir(path, settings.TIMELINE_INCLUDE_FILE))
    files = list(
        get_files_matching_rules(get_files_in_dir(path),
                                 timelineinclude_rules))

    inode_checksum_cache = {}  # translates file inodes to checksums
    metadata_cache = {}  # translates checksums to entry metadata
    cached_extra_attributes = ('location', 'media', 'previews')
    if use_cache:
        # Most files in a directory already have a matching Entry. Recalculating the metadata for each file Entry is
        # wasteful and time-consuming.
        # Instead, we build a cache of all files that have an Entry. If we process a file that already has an Entry (if
        # they have the same inode), we can reuse the cached Entry metadata.
        for entry in source.get_entries():
            try:
                # We also avoid calculating checksums if we don't have to. Instead, we compare the file inodes. If the
                # inodes are the same, THEN we calculate and compare the checksums. If the file in the Entry and the
                # file in the directory have the same checksum, then they're identical, and we can reuse the metadata.
                entry_file_inode = Path(
                    entry.extra_attributes['file']['path']).stat().st_ino
                inode_checksum_cache[
                    entry_file_inode] = entry.extra_attributes['file'][
                        'checksum']
            except FileNotFoundError:
                # This can happen if the file in the Entry was deleted or moved.
                pass

            metadata = {}

            for attribute in cached_extra_attributes:
                if attribute in entry.extra_attributes:
                    metadata[attribute] = entry.extra_attributes[attribute]

            if entry.description:
                metadata['description'] = entry.description

            metadata_cache[entry.extra_attributes['file']
                           ['checksum']] = metadata

    entries_to_create = []
    for file in files:
        file.resolve()

        try:
            checksum = inode_checksum_cache.get(
                file.stat().st_ino) or get_checksum(file)
        except OSError:
            logger.exception(f"Could not generate checksum for {str(file)}")
            raise

        if checksum in metadata_cache:
            mimetype = get_mimetype(file)
            entry = Entry(title=file.name,
                          source=source.entry_source,
                          schema=get_schema_from_mimetype(mimetype),
                          description=metadata_cache[checksum].get(
                              'description', ''),
                          extra_attributes={
                              'file': {
                                  'path': str(file),
                                  'checksum': checksum,
                                  'mimetype': mimetype,
                              },
                          })

            for attribute in cached_extra_attributes:
                if attribute in metadata_cache[checksum]:
                    entry.extra_attributes[attribute] = metadata_cache[
                        checksum][attribute]
        else:
            entry = entry_from_file_path(file, source)

        entry.extra_attributes['backup_date'] = datetime_to_json(backup_date)

        entry.date_on_timeline = get_file_entry_date(
            entry)  # This could change, so it's not cached
        entries_to_create.append(entry)

    source.get_entries().delete(
    )  # TODO: Only delete the entries in the specified directory?
    return Entry.objects.bulk_create(entries_to_create)