Ejemplo n.º 1
0
def get_mimetype(file_object, mimetype_only=False):
    """
    Determine a file's mimetype by calling the system's libmagic
    library via python-magic.
    """
    file_mimetype = None
    file_mime_encoding = None

    temporary_file_object = NamedTemporaryFile()
    file_object.seek(0)
    copyfileobj(fsrc=file_object, fdst=temporary_file_object)
    file_object.seek(0)
    temporary_file_object.seek(0)

    kwargs = {'mime': True}

    if not mimetype_only:
        kwargs['mime_encoding'] = True

    try:
        mime = magic.Magic(**kwargs)

        if mimetype_only:
            file_mimetype = mime.from_file(filename=temporary_file_object.name)
        else:
            file_mimetype, file_mime_encoding = mime.from_file(
                filename=temporary_file_object.name
            ).split('; charset=')
    finally:
        temporary_file_object.close()

    return file_mimetype, file_mime_encoding
Ejemplo n.º 2
0
    def verify_file(self,
                    file_object,
                    signature_file=None,
                    all_keys=False,
                    key_fingerprint=None,
                    key_id=None):
        keys = self._preload_keys(all_keys=all_keys,
                                  key_fingerprint=key_fingerprint,
                                  key_id=key_id)

        if signature_file:
            # Save the original data and invert the argument order
            # Signature first, file second
            temporary_file_object = NamedTemporaryFile()
            temporary_filename = temporary_file_object.name
            shutil.copyfileobj(fsrc=file_object, fdst=temporary_file_object)
            temporary_file_object.seek(0)

            signature_file_buffer = io.BytesIO()
            signature_file_buffer.write(signature_file.read())
            signature_file_buffer.seek(0)
            signature_file.seek(0)
            verify_result = gpg_backend.verify_file(
                file_object=signature_file_buffer,
                data_filename=temporary_filename,
                keys=keys)
            signature_file_buffer.close()
            temporary_file_object.close()
        else:
            verify_result = gpg_backend.verify_file(file_object=file_object,
                                                    keys=keys)

        logger.debug('verify_result.status: %s', verify_result.status)

        if verify_result:
            # Signed and key present
            logger.debug(msg='signed and key present')
            return SignatureVerification(verify_result.__dict__)
        elif verify_result.status == 'no public key' and not (
                key_fingerprint or all_keys or key_id):
            # Signed but key not present, retry with key fetch
            logger.debug(msg='no public key')
            file_object.seek(0)
            return self.verify_file(file_object=file_object,
                                    signature_file=signature_file,
                                    key_id=verify_result.key_id)
        elif verify_result.key_id:
            # Signed, retried and key still not found
            logger.debug(msg='signed, retried and key still not found')
            return SignatureVerification(verify_result.__dict__)
        else:
            logger.debug(msg='file not signed')
            raise VerificationError('File not signed')
Ejemplo n.º 3
0
    def convert(self, *args, **kwargs):
        super(Python, self).convert(*args, **kwargs)

        if self.mime_type == 'application/pdf' and pdftoppm:
            new_file_object = NamedTemporaryFile()
            input_filepath = new_file_object.name
            self.file_object.seek(0)
            shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object)
            self.file_object.seek(0)
            new_file_object.seek(0)

            image_buffer = io.BytesIO()
            try:
                pdftoppm(input_filepath,
                         f=self.page_number + 1,
                         l=self.page_number + 1,
                         _out=image_buffer)
                image_buffer.seek(0)
                return Image.open(image_buffer)
            finally:
                new_file_object.close()
Ejemplo n.º 4
0
    def _process(self, document_version):
        if self.command_exiftool:
            temporary_fileobject = NamedTemporaryFile()

            try:
                document_version.save_to_file(file_object=temporary_fileobject)
                temporary_fileobject.seek(0)
                try:
                    result = self.command_exiftool(temporary_fileobject.name)
                except sh.ErrorReturnCode_1 as exception:
                    result = json.loads(s=exception.stdout)[0]
                    if result.get('Error', '') == 'Unknown file type':
                        # Not a fatal error
                        return result
                else:
                    return json.loads(s=result.stdout)[0]
            finally:
                temporary_fileobject.close()
        else:
            logger.warning(
                'EXIFTool binary not found, not processing document '
                'version: %s', document_version)
Ejemplo n.º 5
0
    def execute(self, file_object, page_number):
        logger.debug('Parsing PDF page: %d', page_number)

        temporary_file_object = NamedTemporaryFile()
        copyfileobj(fsrc=file_object, fdst=temporary_file_object)
        temporary_file_object.seek(0)

        command = []
        command.append(self.pdftotext_path)
        command.append('-f')
        command.append(str(page_number))
        command.append('-l')
        command.append(str(page_number))
        command.append(temporary_file_object.name)
        command.append('-')

        proc = subprocess.Popen(command,
                                close_fds=True,
                                stderr=subprocess.PIPE,
                                stdout=subprocess.PIPE)
        return_code = proc.wait()
        if return_code != 0:
            logger.error(proc.stderr.readline())
            temporary_file_object.close()

            raise ParserError

        output = proc.stdout.read()
        temporary_file_object.close()

        if output == b'\x0c':
            logger.debug('Parser didn\'t return any output')
            return ''

        if output[-3:] == b'\x0a\x0a\x0c':
            return output[:-3]

        return output
Ejemplo n.º 6
0
    def soffice(self):
        """
        Executes LibreOffice as a sub process
        """
        if not self.command_libreoffice:
            raise OfficeConversionError(
                _('LibreOffice not installed or not found.'))

        with NamedTemporaryFile() as temporary_file_object:
            # Copy the source file object of the converter instance to a
            # named temporary file to be able to pass it to the LibreOffice
            # execution.
            self.file_object.seek(0)
            shutil.copyfileobj(fsrc=self.file_object,
                               fdst=temporary_file_object)
            self.file_object.seek(0)
            temporary_file_object.seek(0)

            libreoffice_home_directory = mkdtemp()
            args = (
                temporary_file_object.name,
                '--outdir',
                setting_temporary_directory.value,
                '-env:UserInstallation=file://{}'.format(
                    os.path.join(libreoffice_home_directory,
                                 'LibreOffice_Conversion')),
            )

            kwargs = {'_env': {'HOME': libreoffice_home_directory}}

            if self.mime_type == 'text/plain':
                kwargs.update({'infilter': 'Text (encoded):UTF8,LF,,,'})

            try:
                self.command_libreoffice(*args, **kwargs)
            except sh.ErrorReturnCode as exception:
                temporary_file_object.close()
                raise OfficeConversionError(exception)
            except Exception as exception:
                temporary_file_object.close()
                logger.error('Exception launching Libre Office; %s', exception)
                raise
            finally:
                fs_cleanup(filename=libreoffice_home_directory)

            # LibreOffice return a PDF file with the same name as the input
            # provided but with the .pdf extension.

            # Get the converted output file path out of the temporary file
            # name plus the temporary directory

            filename, extension = os.path.splitext(
                os.path.basename(temporary_file_object.name))

            logger.debug('filename: %s', filename)
            logger.debug('extension: %s', extension)

            converted_file_path = os.path.join(
                setting_temporary_directory.value,
                os.path.extsep.join((filename, 'pdf')))
            logger.debug('converted_file_path: %s', converted_file_path)

        # Don't use context manager with the NamedTemporaryFile on purpose
        # so that it is deleted when the caller closes the file and not
        # before.

        temporary_converted_file_object = NamedTemporaryFile()

        # Copy the LibreOffice output file to a new named temporary file
        # and delete the converted file
        with open(converted_file_path, mode='rb') as converted_file_object:
            shutil.copyfileobj(fsrc=converted_file_object,
                               fdst=temporary_converted_file_object)
        fs_cleanup(filename=converted_file_path)
        temporary_converted_file_object.seek(0)
        return temporary_converted_file_object