def _init_context(self, workdir: pathlib.Path): Context.initialize(config=self.config, workdir=workdir, db=Database(cfg=self.config.db), s3=S3Storage(cfg=self.config.s3)) PdfWaterMarker.initialize( watermark_filename=self.config.experimental.pdf_watermark, watermark_top=self.config.experimental.pdf_watermark_top, )
def timeout_exceeded(job_id: str): job_timeout = Context.get().app.cfg.experimental.job_timeout if job_timeout is None: return raise JobException(job_id=job_id, msg=f'Document generation exceeded time limit ' f'({job_timeout} seconds).')
def prepare_template(self, app_uuid: str, template_id: str) -> Template: ctx = Context.get() query_args = dict( template_id=template_id, app_uuid=app_uuid, ) db_template = ctx.app.db.fetch_template(**query_args) if db_template is None: raise RuntimeError(f'Template {template_id} not found in database') db_files = ctx.app.db.fetch_template_files(**query_args) db_assets = ctx.app.db.fetch_template_assets(**query_args) template_composite = TemplateComposite( db_template=db_template, db_files={f.uuid: f for f in db_files}, db_assets={f.uuid: f for f in db_assets}, ) if self.has_template(app_uuid, template_id): self._refresh_template(app_uuid, template_id, template_composite) else: self._init_new_template(app_uuid, template_id, template_composite) return self.get_template(app_uuid, template_id)
def check_doc_size(job_id: str, doc_size: int): max_size = Context.get().app.cfg.experimental.max_doc_size if max_size is None or doc_size <= max_size: return raise JobException( job_id=job_id, msg=f'Document exceeded size limit ({byte_size_format(max_size)}): ' f'{byte_size_format(doc_size)}.')
def check_format(job_id: str, doc_format: Format, app_config: Optional[DBAppConfig]): pdf_only = Context.get().app.cfg.experimental.pdf_only if app_config is not None: pdf_only = pdf_only or app_config.feature_pdf_only if not pdf_only or doc_format.is_pdf: return raise JobException(job_id=job_id, msg='Only PDF documents are allowed.')
def _store_asset(self, asset: DBTemplateAsset): Context.logger.debug(f'Storing asset {asset.uuid} ({asset.file_name})') remote_path = f'{self.asset_prefix}/{asset.uuid}' local_path = self.template_dir / asset.file_name local_path.parent.mkdir(parents=True, exist_ok=True) result = Context.get().app.s3.download_file(remote_path, local_path) if not result: Context.logger.error( f'Asset "{local_path.name}" cannot be retrieved')
def __init__(self, template, options: dict): super().__init__(template, options) self.rdflib_convert = RdfLibConvert(config=Context.get().app.cfg) self.input_format = FileFormats.get(options[self.OPTION_FROM]) self.output_format = FileFormats.get(options[self.OPTION_TO]) if self.input_format not in self.INPUT_FORMATS: self.raise_exc(f'Unknown input format "{self.input_format.name}"') if self.output_format not in self.OUTPUT_FORMATS: self.raise_exc( f'Unknown output format "{self.output_format.name}"')
def _run(self): self.get_document() try: with timeout(Context.get().app.cfg.experimental.job_timeout): self.prepare_template() self.build_document() self.store_document() except TimeoutError: LimitsEnforcer.timeout_exceeded(job_id=self.doc_uuid, ) self.finalize()
def __init__(self, app_uuid: str, template_dir: pathlib.Path, db_template: TemplateComposite): self.app_uuid = app_uuid self.template_dir = template_dir self.last_used = datetime.datetime.utcnow() self.db_template = db_template self.template_id = self.db_template.template.id self.formats = dict() # type: dict[str, Format] self.asset_prefix = f'templates/{self.db_template.template.id}' if Context.get().app.cfg.cloud.multi_tenant: self.asset_prefix = f'{self.app_uuid}/{self.asset_prefix}'
def _init_new_template(self, app_uuid: str, template_id: str, db_template: TemplateComposite): workdir = Context.get().app.workdir template_dir = workdir / app_uuid / template_id.replace(':', '_') template = Template( app_uuid=app_uuid, template_dir=template_dir, db_template=db_template, ) template.prepare_fs() self._set_template(app_uuid, template_id, template)
def __init__(self, db_job: DBJob): self.ctx = Context.get() self.log = Context.logger self.template = None self.format = None self.app_uuid = db_job.app_uuid self.doc_uuid = db_job.document_uuid self.doc_context = db_job.document_context self.doc = None # type: Optional[DBDocument] self.final_file = None # type: Optional[DocumentFile] self.app_config = None # type: Optional[DBAppConfig] self.app_limits = None # type: Optional[DBAppLimits]
def _work(self): Context.update_trace_id(str(uuid.uuid4())) ctx = Context.get() Context.logger.debug('Trying to fetch a new job') cursor = ctx.app.db.conn_query.new_cursor(use_dict=True) cursor.execute(Database.SELECT_JOB) result = cursor.fetchall() if len(result) != 1: Context.logger.debug(f'Fetched {len(result)} jobs') return False db_job = Database.get_as_job(result[0]) Context.update_document_id(db_job.document_uuid) Context.logger.info(f'Fetched job #{db_job.id}') job = Job(db_job=db_job) job.run() Context.logger.debug('Working done, deleting job from queue') cursor.execute(query=Database.DELETE_JOB, vars=(db_job.id, )) Context.logger.info('Committing transaction') ctx.app.db.conn_query.connection.commit() cursor.close() job.log.info('Job processing finished') return True
def store_document(self, app_uuid: str, file_name: str, content_type: str, data: bytes): object_name = f'{DOCUMENTS_DIR}/{file_name}' if Context.get().app.cfg.cloud.multi_tenant: object_name = f'{app_uuid}/{object_name}' with temp_binary_file(data=data) as file: self.client.put_object( bucket_name=self.cfg.bucket, object_name=object_name, data=file, length=len(data), content_type=content_type, )
def _add_j2_enhancements(self): from document_worker.templates.filters import filters from document_worker.templates.tests import tests from document_worker.model.http import RequestsWrapper self.j2_env.filters.update(filters) self.j2_env.tests.update(tests) template_cfg = Context.get().app.cfg.templates.get_config( self.template.template_id, ) if template_cfg is not None: global_vars = {'secrets': template_cfg.secrets} if template_cfg.requests.enabled: global_vars['requests'] = RequestsWrapper( template_cfg=template_cfg, ) self.j2_env.globals.update(global_vars)
def run(self): ctx = Context.get() Context.logger.info('Preparing to listen for document jobs') queue_conn = ctx.app.db.conn_queue with queue_conn.new_cursor() as cursor: cursor.execute(Database.LISTEN) queue_conn.listening = True Context.logger.info('Listening on document job queue') notifications = list() timeout = ctx.app.cfg.db.queue_timout Context.logger.info( 'Entering working cycle, waiting for notifications') while True: while self._work(): pass Context.logger.debug('Waiting for new notifications') notifications.clear() if not queue_conn.listening: cursor.execute(Database.LISTEN) queue_conn.listening = True w = select.select([queue_conn.connection], [], [], timeout) if w == ([], [], []): Context.logger.debug( f'Nothing received in this cycle ' f'(timeouted after {timeout} seconds.') else: queue_conn.connection.poll() while queue_conn.connection.notifies: notifications.append( queue_conn.connection.notifies.pop()) Context.logger.info( f'Notifications received ({len(notifications)})') Context.logger.debug(f'Notifications: {notifications}') if INTERRUPTED: Context.logger.debug( 'Interrupt signal received, ending...') break
def name_document(cls, document_metadata: DBDocument, document_file: DocumentFile) -> str: config = Context.get().app.cfg strategy = cls._STRATEGIES.get(config.doc.naming_strategy, cls._FALLBACK) return document_file.filename(strategy(document_metadata))
def make_watermark(doc_pdf: bytes, app_config: Optional[DBAppConfig]) -> bytes: watermark = Context.get().app.cfg.experimental.pdf_watermark if watermark is None or app_config is None or not app_config.feature_pdf_watermark: return doc_pdf return PdfWaterMarker.create_watermark(doc_pdf=doc_pdf)
def __init__(self, template, options: dict): super().__init__(template, options) self.wkhtmltopdf = WkHtmlToPdf(config=Context.get().app.cfg)