def match(cls, file_path, entity): mime_types = [normalize_mimetype(m, default=None) for m in cls.MIME_TYPES] mime_types = [m for m in mime_types if m is not None] for mime_type in entity.get("mimeType"): if mime_type in mime_types: return cls.SCORE extensions = [normalize_extension(e) for e in cls.EXTENSIONS] for file_name in entity.get("fileName"): extension = normalize_extension(file_name) if extension is not None and extension in extensions: return cls.SCORE return -1
def load_mime_extensions(): media_types = {} for xcd_file in FILES: doc = etree.parse(xcd_file) path = './*[@oor:package="org.openoffice.TypeDetection"]/node/node' for tnode in doc.xpath(path, namespaces=NS): node = {} for prop in tnode.findall('./prop'): name = prop.get(NAME) for value in prop.findall('./value'): node[name] = value.text media_type = normalize_mimetype(node.get('MediaType'), default=None) if media_type is None: continue extensions = node.get('Extensions') if extensions is None: continue extension = normalize_extension(extensions.split(' ')[0]) if extension is not None: media_types[media_type] = extension return media_types
def match(cls, file_path, result=None): mime_types = [ normalize_mimetype(m, default=None) for m in cls.MIME_TYPES ] # noqa mime_types = [m for m in mime_types if m is not None] mime_type = normalize_mimetype(result.mime_type, default=None) if mime_type in mime_types: return cls.SCORE extensions = [normalize_extension(e) for e in cls.EXTENSIONS] extensions = [e for e in extensions if e is not None] extension = normalize_extension(result.file_name) if extension in extensions: return cls.SCORE return -1
def parse_extensions(self, extensions): if extensions is not None: for ext in extensions.split(' '): if ext == '*': continue ext = normalize_extension(ext) if ext is not None: yield ext
async def convert(request): data = await request.post() upload = data['file'] extension = normalize_extension(upload.filename) mime_type = normalize_mimetype(upload.content_type, default=None) log.info('PDF convert: %s [%s]', upload.filename, mime_type) fd, upload_file = mkstemp() os.close(fd) fd, out_file = mkstemp(suffix='.pdf') os.close(fd) log.info('Source: %s, target: %s', upload_file, out_file) try: with open(upload_file, 'wb') as fh: shutil.copyfileobj(upload.file, fh, BUFFER_SIZE) filters = list(FORMATS.get_filters(extension, mime_type)) timeout = int(request.query.get('timeout', 300)) await asyncio.sleep(0) converter.convert_file(upload_file, out_file, filters, timeout=timeout) out_size = os.path.getsize(out_file) if out_size == 0: raise ConversionFailure("No PDF version was generated.") await asyncio.sleep(0) response = web.StreamResponse() response.content_length = out_size response.content_type = 'application/pdf' await response.prepare(request) with open(out_file, 'rb') as f: while True: chunk = f.read(BUFFER_SIZE) if not chunk: break await response.write(chunk) return response except ConversionFailure as fail: log.info("Failed to convert: %s", fail) return web.Response(text=str(fail), status=400) except Exception as exc: log.exception('System error: %s.', exc) converter.terminate() finally: os.remove(upload_file) os.remove(out_file)
def convert(): acquired = lock.acquire(timeout=2) if not acquired: return ("BUSY", 503) try: if os.path.exists(OUT_PATH): os.unlink(OUT_PATH) upload = request.files['file'] extension = normalize_extension(upload.filename) mime_type = normalize_mimetype(upload.mimetype, default=None) if extension is None: extension = extensions.get(mime_type) log.info('PDF convert: %s [%s]', upload.filename, mime_type) fd, upload_file = mkstemp(suffix='.%s' % extension) fh = os.fdopen(fd, mode='wb') upload.save(fh) fh.close() if listener.poll() is not None: log.error("Listener has terminated.") app.is_dead = True return ("DEAD", 503) args = [ 'unoconv', '-f', 'pdf', '-o', OUT_PATH, '-i', 'MacroExecutionMode=0', '-i', 'ReadOnly=1', '-e', 'SelectPdfVersion=1', '-e', 'MaxImageResolution=300', '--no-launch', upload_file ] err = subprocess.call(args, timeout=TIMEOUT) if err != 0 or not os.path.exists(OUT_PATH): return ('The document could not be converted to PDF.', 400) return send_file(OUT_PATH) except subprocess.TimeoutExpired: log.error("Timeout exceeded: %s", upload.filename) app.is_dead = True return ('Processing the document timed out.', 400) finally: lock.release()