def register_document(folder: str, session_db, storage) -> None: logger.info("Processando a Pasta %s", folder) list_files = files.list_files(folder) obj_xml = None prefix = "" xml_files = files.xml_files_list(folder) _renditions = list( filter(lambda file: ".pdf" in file or ".html" in file, list_files)) if len(xml_files) > 1: raise exceptions.XMLError("Existe %s xmls no pacote SPS", len(xml_files)) else: try: x_file = xml_files[0] except IndexError as ex: raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex) xml_path = os.path.join(folder, x_file) obj_xml = xml.loadToXML(xml_path) xml_sps = SPS_Package(obj_xml) # TODO: é possível que alguns artigos não possuam o self.acron prefix = xml_sps.media_prefix url_xml = storage.register(xml_path, prefix) static_assets, static_additionals = get_document_assets_path( obj_xml, list_files, folder) registered_assets = put_static_assets_into_storage(static_assets, prefix, storage) for additional_path in static_additionals.values(): storage.register(os.path.join(additional_path), prefix) if obj_xml: renditions = get_document_renditions(folder, _renditions, prefix, storage) manifest_data = ManifestDomainAdapter( manifest=manifest.get_document_manifest( obj_xml, url_xml, registered_assets, renditions)) try: session_db.documents.add(data=manifest_data) session_db.changes.add({ "timestamp": utcnow(), "entity": "Document", "id": manifest_data.id() }) logger.info("Document-store save: %s", manifest_data.id()) except AlreadyExists as exc: logger.exception(exc) return obj_xml, manifest_data.id()
def register_document(folder: str, session_db, storage) -> None: logger.info("Processando a Pasta %s", folder) list_files = files.list_files(folder) obj_xml = None prefix = "" xml_files = files.xml_files_list(folder) medias_files = set(list_files) - set(xml_files) if len(xml_files) > 1: raise exceptions.XMLError("Existe %s xmls no pacote SPS", len(xml_files)) else: try: x_file = xml_files[0] except IndexError as ex: raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex) xml_path = os.path.join(folder, x_file) obj_xml = xml.loadToXML(xml_path) xml_sps = SPS_Package(obj_xml) prefix = xml_sps.media_prefix url_xml = storage.register(xml_path, prefix) assets = [] for m_file in medias_files: assets.append({ "asset_id": m_file, "asset_url": storage.register(os.path.join(folder, m_file), prefix), }) if obj_xml: manifest_data = ManifestDomainAdapter( manifest=manifest.get_document_manifest(obj_xml, url_xml, assets)) try: session_db.documents.add(data=manifest_data) session_db.changes.add({ "timestamp": utcnow(), "entity": "Document", "id": manifest_data.id() }) logger.info("Document-store save: %s", manifest_data.id()) except AlreadyExists as exc: logger.exception(exc) return obj_xml, manifest_data.id()
def media_prefix(self): if not self.scielo_id: raise exceptions.XMLError("Não existe Scielo-Id no XML: %s", repr(self)) return f"{self.issn}/{self.scielo_id}"
def register_document(folder: str, session, storage, pid_database_engine, poison_pill=PoisonPill()) -> None: """Registra registra pacotes SPS em uma instância do Kernel e seus ativos digitais em um object storage.""" if poison_pill.poisoned: return logger.debug("Starting the import step for '%s' package.", folder) package_files = files.list_files(folder) xmls = files.xml_files_list(folder) if xmls is None or len(xmls) == 0: raise exceptions.XMLError( "There is no XML file into package '%s'. Please verify and try later." % folder ) from None xml_path = os.path.join(folder, xmls[0]) constructor.article_xml_constructor(xml_path, folder, pid_database_engine, False) try: obj_xml = xml.loadToXML(xml_path) except lxml.etree.ParseError as exc: raise exceptions.XMLError( "Could not parse the '%s' file, please validate" " this file before then try to import again." % xml_path, ) from None xml_sps = SPS_Package(obj_xml) pid_v3 = xml_sps.scielo_pid_v3 try: session.documents.fetch(id=pid_v3) except DoesNotExist: pass else: logger.debug( "Document '%s' already exist in kernel. Returning article result information", pid_v3, ) return get_article_result_dict(xml_sps) prefix = xml_sps.media_prefix or "" url_xml = storage.register(xml_path, prefix) static_assets, static_additionals = get_document_assets_path( obj_xml, package_files, folder ) registered_assets = put_static_assets_into_storage(static_assets, prefix, storage) for additional_path in static_additionals.values(): storage.register(os.path.join(additional_path), prefix) renditions = get_document_renditions(folder, prefix, storage) document = Document( manifest=manifest.get_document_manifest( xml_sps, url_xml, registered_assets, renditions ) ) try: add_document(session, document) if renditions: add_renditions(session, document) except AlreadyExists as exc: logger.error(exc) else: logger.debug("Document with id '%s' was imported.", document.id()) return get_article_result_dict(xml_sps)
def media_prefix(self): if not self.scielo_pid_v3: raise exceptions.XMLError("Não existe scielo-pid-v3") return f"{self.issn}/{self.scielo_pid_v3}"