def __init__(self, filename, basename, tmp_dir, parent): self.render_lock = threading.Lock() self.filename = os.path.abspath(filename) self.mtime = os.path.getmtime(filename) if basename is None: # When importing files self.basename = os.path.basename(filename) else: # When copy-pasting self.basename = basename self.password = "" filemime = mimetypes.guess_type(self.filename)[0] if not filemime: raise PDFDocError(_("Unknown file format")) if filemime == "application/pdf": if self.filename.startswith(tmp_dir) and basename is None: # In the "Insert Blank Page" we don't need to copy self.filename self.copyname = self.filename self.basename = "" else: fd, self.copyname = tempfile.mkstemp(suffix=".pdf", dir=tmp_dir) os.close(fd) shutil.copy(self.filename, self.copyname) try: self.__from_file(parent, self.basename) except GLib.Error as e: raise PDFDocError(e.message + ": " + filename) elif filemime.split("/")[0] == "image": if not img2pdf: raise PDFDocError( _("Image files are only supported with img2pdf")) if mimetypes.guess_type(filename)[0] in img2pdf_supported_img: fd, self.copyname = tempfile.mkstemp(suffix=".pdf", dir=tmp_dir) os.close(fd) with open(self.copyname, "wb") as f: img = img2pdf.Image.open(filename) if img.mode != "RGBA" and "transparency" in img.info: # TODO: Find a way to keep image in P or L format and remove transparency. # This will work but converting from 1, L, P to RGB is not optimal. img = img.convert("RGBA") if img.mode == "RGBA": bg = img2pdf.Image.new("RGB", img.size, (255, 255, 255)) bg.paste(img, mask=img.split()[-1]) imgio = img2pdf.BytesIO() bg.save(imgio, "PNG") imgio.seek(0) f.write(img2pdf.convert(imgio)) else: f.write(img2pdf.convert(filename)) uri = pathlib.Path(self.copyname).as_uri() self.document = Poppler.Document.new_from_file(uri, None) else: raise PDFDocError( _("Image format is not supported by img2pdf")) else: raise PDFDocError(_("File is neither pdf nor image"))
async def download_images(session, directory, links): files = [] for i, link in enumerate(links): async with session.head(link, allow_redirects=True) as resp: size = int(resp.headers.get('Content-Length', -1)) if size > 5e6: # 5 MB continue image_filename = f'{directory}/{i}.jpg' async with session.get(link) as resp: content = await resp.content.read() imgio = img2pdf.BytesIO(content) image = Image.open(imgio).convert('RGB') # Unknown ExifOrientationError on PNG format image.save(image_filename, format='JPEG') files.append(image_filename) return files