Esempio n. 1
0
 def _init_context(self, workdir: pathlib.Path):
     Context.initialize(config=self.config,
                        workdir=workdir,
                        db=Database(cfg=self.config.db),
                        s3=S3Storage(cfg=self.config.s3))
     PdfWaterMarker.initialize(
         watermark_filename=self.config.experimental.pdf_watermark,
         watermark_top=self.config.experimental.pdf_watermark_top,
     )
Esempio n. 2
0
 def timeout_exceeded(job_id: str):
     job_timeout = Context.get().app.cfg.experimental.job_timeout
     if job_timeout is None:
         return
     raise JobException(job_id=job_id,
                        msg=f'Document generation exceeded time limit '
                        f'({job_timeout} seconds).')
Esempio n. 3
0
    def prepare_template(self, app_uuid: str, template_id: str) -> Template:
        ctx = Context.get()
        query_args = dict(
            template_id=template_id,
            app_uuid=app_uuid,
        )
        db_template = ctx.app.db.fetch_template(**query_args)
        if db_template is None:
            raise RuntimeError(f'Template {template_id} not found in database')
        db_files = ctx.app.db.fetch_template_files(**query_args)
        db_assets = ctx.app.db.fetch_template_assets(**query_args)
        template_composite = TemplateComposite(
            db_template=db_template,
            db_files={f.uuid: f
                      for f in db_files},
            db_assets={f.uuid: f
                       for f in db_assets},
        )

        if self.has_template(app_uuid, template_id):
            self._refresh_template(app_uuid, template_id, template_composite)
        else:
            self._init_new_template(app_uuid, template_id, template_composite)

        return self.get_template(app_uuid, template_id)
Esempio n. 4
0
 def check_doc_size(job_id: str, doc_size: int):
     max_size = Context.get().app.cfg.experimental.max_doc_size
     if max_size is None or doc_size <= max_size:
         return
     raise JobException(
         job_id=job_id,
         msg=f'Document exceeded size limit ({byte_size_format(max_size)}): '
         f'{byte_size_format(doc_size)}.')
Esempio n. 5
0
 def check_format(job_id: str, doc_format: Format,
                  app_config: Optional[DBAppConfig]):
     pdf_only = Context.get().app.cfg.experimental.pdf_only
     if app_config is not None:
         pdf_only = pdf_only or app_config.feature_pdf_only
     if not pdf_only or doc_format.is_pdf:
         return
     raise JobException(job_id=job_id,
                        msg='Only PDF documents are allowed.')
Esempio n. 6
0
 def _store_asset(self, asset: DBTemplateAsset):
     Context.logger.debug(f'Storing asset {asset.uuid} ({asset.file_name})')
     remote_path = f'{self.asset_prefix}/{asset.uuid}'
     local_path = self.template_dir / asset.file_name
     local_path.parent.mkdir(parents=True, exist_ok=True)
     result = Context.get().app.s3.download_file(remote_path, local_path)
     if not result:
         Context.logger.error(
             f'Asset "{local_path.name}" cannot be retrieved')
Esempio n. 7
0
 def __init__(self, template, options: dict):
     super().__init__(template, options)
     self.rdflib_convert = RdfLibConvert(config=Context.get().app.cfg)
     self.input_format = FileFormats.get(options[self.OPTION_FROM])
     self.output_format = FileFormats.get(options[self.OPTION_TO])
     if self.input_format not in self.INPUT_FORMATS:
         self.raise_exc(f'Unknown input format "{self.input_format.name}"')
     if self.output_format not in self.OUTPUT_FORMATS:
         self.raise_exc(
             f'Unknown output format "{self.output_format.name}"')
Esempio n. 8
0
 def _run(self):
     self.get_document()
     try:
         with timeout(Context.get().app.cfg.experimental.job_timeout):
             self.prepare_template()
             self.build_document()
             self.store_document()
     except TimeoutError:
         LimitsEnforcer.timeout_exceeded(job_id=self.doc_uuid, )
     self.finalize()
Esempio n. 9
0
 def __init__(self, app_uuid: str, template_dir: pathlib.Path,
              db_template: TemplateComposite):
     self.app_uuid = app_uuid
     self.template_dir = template_dir
     self.last_used = datetime.datetime.utcnow()
     self.db_template = db_template
     self.template_id = self.db_template.template.id
     self.formats = dict()  # type: dict[str, Format]
     self.asset_prefix = f'templates/{self.db_template.template.id}'
     if Context.get().app.cfg.cloud.multi_tenant:
         self.asset_prefix = f'{self.app_uuid}/{self.asset_prefix}'
Esempio n. 10
0
 def _init_new_template(self, app_uuid: str, template_id: str,
                        db_template: TemplateComposite):
     workdir = Context.get().app.workdir
     template_dir = workdir / app_uuid / template_id.replace(':', '_')
     template = Template(
         app_uuid=app_uuid,
         template_dir=template_dir,
         db_template=db_template,
     )
     template.prepare_fs()
     self._set_template(app_uuid, template_id, template)
Esempio n. 11
0
 def __init__(self, db_job: DBJob):
     self.ctx = Context.get()
     self.log = Context.logger
     self.template = None
     self.format = None
     self.app_uuid = db_job.app_uuid
     self.doc_uuid = db_job.document_uuid
     self.doc_context = db_job.document_context
     self.doc = None  # type: Optional[DBDocument]
     self.final_file = None  # type: Optional[DocumentFile]
     self.app_config = None  # type: Optional[DBAppConfig]
     self.app_limits = None  # type: Optional[DBAppLimits]
Esempio n. 12
0
 def _work(self):
     Context.update_trace_id(str(uuid.uuid4()))
     ctx = Context.get()
     Context.logger.debug('Trying to fetch a new job')
     cursor = ctx.app.db.conn_query.new_cursor(use_dict=True)
     cursor.execute(Database.SELECT_JOB)
     result = cursor.fetchall()
     if len(result) != 1:
         Context.logger.debug(f'Fetched {len(result)} jobs')
         return False
     db_job = Database.get_as_job(result[0])
     Context.update_document_id(db_job.document_uuid)
     Context.logger.info(f'Fetched job #{db_job.id}')
     job = Job(db_job=db_job)
     job.run()
     Context.logger.debug('Working done, deleting job from queue')
     cursor.execute(query=Database.DELETE_JOB, vars=(db_job.id, ))
     Context.logger.info('Committing transaction')
     ctx.app.db.conn_query.connection.commit()
     cursor.close()
     job.log.info('Job processing finished')
     return True
Esempio n. 13
0
 def store_document(self, app_uuid: str, file_name: str,
                    content_type: str, data: bytes):
     object_name = f'{DOCUMENTS_DIR}/{file_name}'
     if Context.get().app.cfg.cloud.multi_tenant:
         object_name = f'{app_uuid}/{object_name}'
     with temp_binary_file(data=data) as file:
         self.client.put_object(
             bucket_name=self.cfg.bucket,
             object_name=object_name,
             data=file,
             length=len(data),
             content_type=content_type,
         )
Esempio n. 14
0
 def _add_j2_enhancements(self):
     from document_worker.templates.filters import filters
     from document_worker.templates.tests import tests
     from document_worker.model.http import RequestsWrapper
     self.j2_env.filters.update(filters)
     self.j2_env.tests.update(tests)
     template_cfg = Context.get().app.cfg.templates.get_config(
         self.template.template_id, )
     if template_cfg is not None:
         global_vars = {'secrets': template_cfg.secrets}
         if template_cfg.requests.enabled:
             global_vars['requests'] = RequestsWrapper(
                 template_cfg=template_cfg, )
         self.j2_env.globals.update(global_vars)
Esempio n. 15
0
    def run(self):
        ctx = Context.get()
        Context.logger.info('Preparing to listen for document jobs')
        queue_conn = ctx.app.db.conn_queue
        with queue_conn.new_cursor() as cursor:
            cursor.execute(Database.LISTEN)
            queue_conn.listening = True
            Context.logger.info('Listening on document job queue')

            notifications = list()
            timeout = ctx.app.cfg.db.queue_timout

            Context.logger.info(
                'Entering working cycle, waiting for notifications')
            while True:
                while self._work():
                    pass

                Context.logger.debug('Waiting for new notifications')
                notifications.clear()
                if not queue_conn.listening:
                    cursor.execute(Database.LISTEN)
                    queue_conn.listening = True

                w = select.select([queue_conn.connection], [], [], timeout)
                if w == ([], [], []):
                    Context.logger.debug(
                        f'Nothing received in this cycle '
                        f'(timeouted after {timeout} seconds.')
                else:
                    queue_conn.connection.poll()
                    while queue_conn.connection.notifies:
                        notifications.append(
                            queue_conn.connection.notifies.pop())
                    Context.logger.info(
                        f'Notifications received ({len(notifications)})')
                    Context.logger.debug(f'Notifications: {notifications}')

                if INTERRUPTED:
                    Context.logger.debug(
                        'Interrupt signal received, ending...')
                    break
Esempio n. 16
0
 def name_document(cls, document_metadata: DBDocument,
                   document_file: DocumentFile) -> str:
     config = Context.get().app.cfg
     strategy = cls._STRATEGIES.get(config.doc.naming_strategy,
                                    cls._FALLBACK)
     return document_file.filename(strategy(document_metadata))
Esempio n. 17
0
 def make_watermark(doc_pdf: bytes,
                    app_config: Optional[DBAppConfig]) -> bytes:
     watermark = Context.get().app.cfg.experimental.pdf_watermark
     if watermark is None or app_config is None or not app_config.feature_pdf_watermark:
         return doc_pdf
     return PdfWaterMarker.create_watermark(doc_pdf=doc_pdf)
Esempio n. 18
0
 def __init__(self, template, options: dict):
     super().__init__(template, options)
     self.wkhtmltopdf = WkHtmlToPdf(config=Context.get().app.cfg)