def get_mimetype(file_object, mimetype_only=False): """ Determine a file's mimetype by calling the system's libmagic library via python-magic. """ file_mimetype = None file_mime_encoding = None temporary_file_object = NamedTemporaryFile() file_object.seek(0) copyfileobj(fsrc=file_object, fdst=temporary_file_object) file_object.seek(0) temporary_file_object.seek(0) kwargs = {'mime': True} if not mimetype_only: kwargs['mime_encoding'] = True try: mime = magic.Magic(**kwargs) if mimetype_only: file_mimetype = mime.from_file(filename=temporary_file_object.name) else: file_mimetype, file_mime_encoding = mime.from_file( filename=temporary_file_object.name ).split('; charset=') finally: temporary_file_object.close() return file_mimetype, file_mime_encoding
def verify_file(self, file_object, signature_file=None, all_keys=False, key_fingerprint=None, key_id=None): keys = self._preload_keys(all_keys=all_keys, key_fingerprint=key_fingerprint, key_id=key_id) if signature_file: # Save the original data and invert the argument order # Signature first, file second temporary_file_object = NamedTemporaryFile() temporary_filename = temporary_file_object.name shutil.copyfileobj(fsrc=file_object, fdst=temporary_file_object) temporary_file_object.seek(0) signature_file_buffer = io.BytesIO() signature_file_buffer.write(signature_file.read()) signature_file_buffer.seek(0) signature_file.seek(0) verify_result = gpg_backend.verify_file( file_object=signature_file_buffer, data_filename=temporary_filename, keys=keys) signature_file_buffer.close() temporary_file_object.close() else: verify_result = gpg_backend.verify_file(file_object=file_object, keys=keys) logger.debug('verify_result.status: %s', verify_result.status) if verify_result: # Signed and key present logger.debug(msg='signed and key present') return SignatureVerification(verify_result.__dict__) elif verify_result.status == 'no public key' and not ( key_fingerprint or all_keys or key_id): # Signed but key not present, retry with key fetch logger.debug(msg='no public key') file_object.seek(0) return self.verify_file(file_object=file_object, signature_file=signature_file, key_id=verify_result.key_id) elif verify_result.key_id: # Signed, retried and key still not found logger.debug(msg='signed, retried and key still not found') return SignatureVerification(verify_result.__dict__) else: logger.debug(msg='file not signed') raise VerificationError('File not signed')
def convert(self, *args, **kwargs): super(Python, self).convert(*args, **kwargs) if self.mime_type == 'application/pdf' and pdftoppm: new_file_object = NamedTemporaryFile() input_filepath = new_file_object.name self.file_object.seek(0) shutil.copyfileobj(fsrc=self.file_object, fdst=new_file_object) self.file_object.seek(0) new_file_object.seek(0) image_buffer = io.BytesIO() try: pdftoppm(input_filepath, f=self.page_number + 1, l=self.page_number + 1, _out=image_buffer) image_buffer.seek(0) return Image.open(image_buffer) finally: new_file_object.close()
def _process(self, document_version): if self.command_exiftool: temporary_fileobject = NamedTemporaryFile() try: document_version.save_to_file(file_object=temporary_fileobject) temporary_fileobject.seek(0) try: result = self.command_exiftool(temporary_fileobject.name) except sh.ErrorReturnCode_1 as exception: result = json.loads(s=exception.stdout)[0] if result.get('Error', '') == 'Unknown file type': # Not a fatal error return result else: return json.loads(s=result.stdout)[0] finally: temporary_fileobject.close() else: logger.warning( 'EXIFTool binary not found, not processing document ' 'version: %s', document_version)
def execute(self, file_object, page_number): logger.debug('Parsing PDF page: %d', page_number) temporary_file_object = NamedTemporaryFile() copyfileobj(fsrc=file_object, fdst=temporary_file_object) temporary_file_object.seek(0) command = [] command.append(self.pdftotext_path) command.append('-f') command.append(str(page_number)) command.append('-l') command.append(str(page_number)) command.append(temporary_file_object.name) command.append('-') proc = subprocess.Popen(command, close_fds=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() if return_code != 0: logger.error(proc.stderr.readline()) temporary_file_object.close() raise ParserError output = proc.stdout.read() temporary_file_object.close() if output == b'\x0c': logger.debug('Parser didn\'t return any output') return '' if output[-3:] == b'\x0a\x0a\x0c': return output[:-3] return output