def _add_change(session, instance, entity, id=None): session.changes.add({ "timestamp": utcnow(), "entity": entity, "id": id or instance.id(), "content_gz": gzip.compress(instance.data_bytes()), "content_type": instance.data_type, })
def register_document(folder: str, session_db, storage) -> None: logger.info("Processando a Pasta %s", folder) list_files = files.list_files(folder) obj_xml = None prefix = "" xml_files = files.xml_files_list(folder) _renditions = list( filter(lambda file: ".pdf" in file or ".html" in file, list_files)) if len(xml_files) > 1: raise exceptions.XMLError("Existe %s xmls no pacote SPS", len(xml_files)) else: try: x_file = xml_files[0] except IndexError as ex: raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex) xml_path = os.path.join(folder, x_file) obj_xml = xml.loadToXML(xml_path) xml_sps = SPS_Package(obj_xml) # TODO: é possível que alguns artigos não possuam o self.acron prefix = xml_sps.media_prefix url_xml = storage.register(xml_path, prefix) static_assets, static_additionals = get_document_assets_path( obj_xml, list_files, folder) registered_assets = put_static_assets_into_storage(static_assets, prefix, storage) for additional_path in static_additionals.values(): storage.register(os.path.join(additional_path), prefix) if obj_xml: renditions = get_document_renditions(folder, _renditions, prefix, storage) manifest_data = ManifestDomainAdapter( manifest=manifest.get_document_manifest( obj_xml, url_xml, registered_assets, renditions)) try: session_db.documents.add(data=manifest_data) session_db.changes.add({ "timestamp": utcnow(), "entity": "Document", "id": manifest_data.id() }) logger.info("Document-store save: %s", manifest_data.id()) except AlreadyExists as exc: logger.exception(exc) return obj_xml, manifest_data.id()
def create_aop_bundle(session_db, issn): _journal = session_db.journals.fetch(issn) bundle_id = scielo_ids_generator.aops_bundle_id(issn) manifest_data = ManifestDomainAdapter( manifest=manifest.get_document_bundle_manifest(bundle_id, utcnow())) session_db.documents_bundles.add(data=manifest_data) session_db.changes.add({ "timestamp": utcnow(), "entity": "DocumentsBundle", "id": bundle_id }) _journal.ahead_of_print_bundle = bundle_id session_db.journals.update(_journal) session_db.changes.add({ "timestamp": utcnow(), "entity": "Journal", "id": issn }) return session_db.documents_bundles.fetch(bundle_id)
def create_aop_bundle(session_db, issn): journal = session_db.journals.fetch(issn) bundle_id = scielo_ids_generator.aops_bundle_id(issn) bundle = DocumentsBundle( manifest=manifest.get_document_bundle_manifest(bundle_id, utcnow()) ) add_bundle(session_db, bundle) journal.ahead_of_print_bundle = bundle.id() update_journal(session_db, journal) return session_db.documents_bundles.fetch(bundle.id())
def import_documents_bundles_link_with_journal(file_path: str, session: Session): """Fachada responsável por ler o arquivo de link entre journals e documents bundles e atualizar os journals com os identificadores dos bundles O formato esperado para o arquivo de link é: ``` { "journal_id": [ { "id": "issue-2", "order": "0002", "number": "02", "volume": "02", "year": "2019", "supplement": "supplement", }, { "id": "issue-2", "order": "0002", "number": "02", "volume": "02", "year": "2019", "supplement": "supplement", }, ] } ``` """ links = reading.read_json_file(file_path) for journal_id, bundles in links.items(): try: _journal = session.journals.fetch(journal_id) for bundle_id in bundles: try: _journal.add_issue(bundle_id) except AlreadyExists: logger.debug("Bundle %s already exists in journal %s" % (bundle_id["id"], journal_id)) session.journals.update(_journal) session.changes.add({ "timestamp": utcnow(), "entity": "Journal", "id": _journal.id() }) except DoesNotExist: logger.debug("Journal %s does not exists, cannot link bundles." % journal_id)
def register_document(folder: str, session_db, storage) -> None: logger.info("Processando a Pasta %s", folder) list_files = files.list_files(folder) obj_xml = None prefix = "" xml_files = files.xml_files_list(folder) medias_files = set(list_files) - set(xml_files) if len(xml_files) > 1: raise exceptions.XMLError("Existe %s xmls no pacote SPS", len(xml_files)) else: try: x_file = xml_files[0] except IndexError as ex: raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex) xml_path = os.path.join(folder, x_file) obj_xml = xml.loadToXML(xml_path) xml_sps = SPS_Package(obj_xml) prefix = xml_sps.media_prefix url_xml = storage.register(xml_path, prefix) assets = [] for m_file in medias_files: assets.append({ "asset_id": m_file, "asset_url": storage.register(os.path.join(folder, m_file), prefix), }) if obj_xml: manifest_data = ManifestDomainAdapter( manifest=manifest.get_document_manifest(obj_xml, url_xml, assets)) try: session_db.documents.add(data=manifest_data) session_db.changes.add({ "timestamp": utcnow(), "entity": "Document", "id": manifest_data.id() }) logger.info("Document-store save: %s", manifest_data.id()) except AlreadyExists as exc: logger.exception(exc) return obj_xml, manifest_data.id()
def link_documents_bundles_with_documents(documents_bundle: DocumentsBundle, documents: List[str], session: Session): """Função responsável por atualizar o relacionamento entre documents bundles e documents no nível de banco de dados""" for document in documents: try: documents_bundle.add_document(document) except AlreadyExists: logger.info("Document %s already exists in documents bundle %s" % (document, documents_bundle)) session.documents_bundles.update(documents_bundle) session.changes.add({ "timestamp": utcnow(), "entity": "DocumentsBundle", "id": documents_bundle.id(), })
def import_issues(json_file: str, session: Session): """Fachada com passo a passo de processamento e carga de fascículo em formato JSON para a base Kernel""" issues_as_json = reading.read_json_file(json_file) issues_as_xylose = conversion.conversion_issues_to_xylose(issues_as_json) issues_as_xylose = filter_issues(issues_as_xylose) issues_as_kernel = conversion.conversion_issues_to_kernel(issues_as_xylose) for issue in issues_as_kernel: manifest = ManifestDomainAdapter(manifest=issue) try: session.documents_bundles.add(manifest) session.changes.add({ "timestamp": utcnow(), "entity": "DocumentsBundle", "id": manifest.id(), }) except AlreadyExists as exc: logger.info(str(exc))
def import_journals(json_file: str, session: Session): """Fachada com passo a passo de processamento e carga de periódicos em formato JSON para a base Kernel""" try: journals_as_json = reading.read_json_file(json_file) journals_as_kernel = conversion.conversion_journals_to_kernel( journals=journals_as_json ) for journal in journals_as_kernel: manifest = ManifestDomainAdapter(manifest=journal) try: session.journals.add(data=manifest) session.changes.add( {"timestamp": utcnow(), "entity": "Journal", "id": manifest.id()} ) except AlreadyExists as exc: logger.info(str(exc)) except (FileNotFoundError, ValueError) as exc: logger.debug(str(exc))