Beispiel #1
0
def test_change_filename_extension(app):
    """Test change filename extension."""
    with pytest.raises(Exception) as e:
        change_filename_extension('test', 'txt')
    assert str(e.value) == 'test is not a valid filename'

    assert change_filename_extension('test.pdf', 'txt') == 'test-pdf.txt'
Beispiel #2
0
    def create_thumbnail(self, file):
        """Create a thumbnail for record.

        This is done by getting the file with order 1 or the first file
        instead.

        :param file: File from which thumbnail is created.
        """
        try:
            # Create thumbnail
            image_blob = create_thumbnail_from_file(file.file.uri,
                                                    file.mimetype)

            thumbnail_key = change_filename_extension(file['key'], 'jpg')

            # Store thumbnail in record's files
            self.files[thumbnail_key] = BytesIO(image_blob)
            self.files[thumbnail_key]['type'] = 'thumbnail'
        except Exception as exception:
            current_app.logger.warning(
                'Error during thumbnail generation of {file} of record '
                '{record}: {error}'.format(file=file['key'],
                                           error=exception,
                                           record=self.get(
                                               'identifiedBy', self['pid'])))
Beispiel #3
0
    def create_fulltext_file(self, file):
        """Create fulltext file corresponding to give file object.

        :param file: File object.
        """
        # If extract fulltext is disabled or file is not a PDF
        if not current_app.config.get(
                'SONAR_DOCUMENTS_EXTRACT_FULLTEXT_ON_IMPORT'
        ) or file.mimetype != 'application/pdf':
            return

        # Try to extract full text from file data, and generate a warning if
        # it's not possible. For several cases, file is locked against fulltext
        # copy.
        try:
            with file.file.storage().open() as pdf_file:
                fulltext = extract_text_from_content(pdf_file.read())

            key = change_filename_extension(file.key, 'txt')
            self.files[key] = BytesIO(fulltext.encode())
            self.files[key]['type'] = 'fulltext'
        except Exception as exception:
            current_app.logger.warning(
                'Error during fulltext extraction of {file} of record '
                '{record}: {error}'.format(file=file.key,
                                           error=exception,
                                           record=self['identifiedBy']))
Beispiel #4
0
    def add_file(self, data, key, **kwargs):
        """Create file and add it to record.

        :param data: Binary data of file
        :param str key: File key

        kwargs may contain some additional data such as: file label, file type,
        order and url.
        """
        if not current_app.config.get('SONAR_DOCUMENTS_IMPORT_FILES'):
            return

        # If file with the same key exists and checksum is the same as the
        # registered file, we don't do anything
        checksum = compute_md5_checksum(BytesIO(data))
        if key in self.files and checksum == self.files[key].file.checksum:
            return

        # Create the file
        self.files[key] = BytesIO(data)
        self.files[key]['label'] = kwargs.get('label', key)
        self.files[key]['type'] = kwargs.get('type', 'file')
        self.files[key]['order'] = kwargs.get('order', 1)

        # Embargo
        if kwargs.get('restricted'):
            self.files[key]['restricted'] = kwargs['restricted']

        if kwargs.get('embargo_date'):
            self.files[key]['embargo_date'] = kwargs['embargo_date']

        # Store external file URL
        if kwargs.get('url'):
            self.files[key]['external_url'] = kwargs['url']

        # Create thumbnail
        if current_app.config.get('SONAR_DOCUMENTS_GENERATE_THUMBNAIL'):
            self.create_thumbnail(self.files[key])

        # Try to extract full text from file data, and generate a warning if
        # it's not possible. For several cases, file is locked against fulltext
        # copy.
        if current_app.config.get(
                'SONAR_DOCUMENTS_EXTRACT_FULLTEXT_ON_IMPORT'
        ) and self.files[key].mimetype == 'application/pdf':
            try:
                fulltext = extract_text_from_content(data)

                key = change_filename_extension(key, 'txt')
                self.files[key] = BytesIO(fulltext.encode())
                self.files[key]['type'] = 'fulltext'
            except Exception as exception:
                current_app.logger.warning(
                    'Error during fulltext extraction of {file} of record '
                    '{record}: {error}'.format(file=key,
                                               error=exception,
                                               record=self['identifiedBy']))
Beispiel #5
0
def thumbnail(file, files):
    """Get thumbnail from file.

    :param file: Dict of file from which thumbnail will be returned.
    :param files: Liste of files of the record.
    """
    key = change_filename_extension(file['key'], 'jpg')

    matches = [file for file in files if file['key'] == key]

    if not matches:
        return None

    return matches[0]
Beispiel #6
0
def get_thumbnail(file, record):
    """Get thumbnail from file.

    If file is restricted, a restricted image is returned. If no thumbnail
    found, a default image is returned.

    :param file: Dict of file from which thumbnail will be returned.
    :param record: Record object.
    :returns: URL to thumbnail file.
    """
    if file['restriction']['restricted']:
        return 'static/images/restricted.png'

    key = change_filename_extension(file['key'], 'jpg')

    matches = [file for file in record['_files'] if file['key'] == key]

    if not matches:
        return 'static/images/no-image.png'

    return '/documents/{pid}/files/{key}'.format(pid=record['pid'], key=key)