Exemple #1
0
def _add_change(session, instance, entity, id=None):
    session.changes.add({
        "timestamp": utcnow(),
        "entity": entity,
        "id": id or instance.id(),
        "content_gz": gzip.compress(instance.data_bytes()),
        "content_type": instance.data_type,
    })
def register_document(folder: str, session_db, storage) -> None:

    logger.info("Processando a Pasta %s", folder)
    list_files = files.list_files(folder)

    obj_xml = None
    prefix = ""
    xml_files = files.xml_files_list(folder)
    _renditions = list(
        filter(lambda file: ".pdf" in file or ".html" in file, list_files))

    if len(xml_files) > 1:
        raise exceptions.XMLError("Existe %s xmls no pacote SPS",
                                  len(xml_files))
    else:
        try:
            x_file = xml_files[0]
        except IndexError as ex:
            raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex)

    xml_path = os.path.join(folder, x_file)
    obj_xml = xml.loadToXML(xml_path)

    xml_sps = SPS_Package(obj_xml)

    # TODO: é possível que alguns artigos não possuam o self.acron
    prefix = xml_sps.media_prefix
    url_xml = storage.register(xml_path, prefix)

    static_assets, static_additionals = get_document_assets_path(
        obj_xml, list_files, folder)
    registered_assets = put_static_assets_into_storage(static_assets, prefix,
                                                       storage)

    for additional_path in static_additionals.values():
        storage.register(os.path.join(additional_path), prefix)

    if obj_xml:
        renditions = get_document_renditions(folder, _renditions, prefix,
                                             storage)
        manifest_data = ManifestDomainAdapter(
            manifest=manifest.get_document_manifest(
                obj_xml, url_xml, registered_assets, renditions))

        try:
            session_db.documents.add(data=manifest_data)
            session_db.changes.add({
                "timestamp": utcnow(),
                "entity": "Document",
                "id": manifest_data.id()
            })
            logger.info("Document-store save: %s", manifest_data.id())
        except AlreadyExists as exc:
            logger.exception(exc)

    return obj_xml, manifest_data.id()
def create_aop_bundle(session_db, issn):
    _journal = session_db.journals.fetch(issn)
    bundle_id = scielo_ids_generator.aops_bundle_id(issn)
    manifest_data = ManifestDomainAdapter(
        manifest=manifest.get_document_bundle_manifest(bundle_id, utcnow()))
    session_db.documents_bundles.add(data=manifest_data)
    session_db.changes.add({
        "timestamp": utcnow(),
        "entity": "DocumentsBundle",
        "id": bundle_id
    })
    _journal.ahead_of_print_bundle = bundle_id
    session_db.journals.update(_journal)
    session_db.changes.add({
        "timestamp": utcnow(),
        "entity": "Journal",
        "id": issn
    })
    return session_db.documents_bundles.fetch(bundle_id)
Exemple #4
0
def create_aop_bundle(session_db, issn):
    journal = session_db.journals.fetch(issn)
    bundle_id = scielo_ids_generator.aops_bundle_id(issn)
    bundle = DocumentsBundle(
        manifest=manifest.get_document_bundle_manifest(bundle_id, utcnow())
    )
    add_bundle(session_db, bundle)
    journal.ahead_of_print_bundle = bundle.id()
    update_journal(session_db, journal)
    return session_db.documents_bundles.fetch(bundle.id())
def import_documents_bundles_link_with_journal(file_path: str,
                                               session: Session):
    """Fachada responsável por ler o arquivo de link entre
    journals e documents bundles e atualizar os journals com os
    identificadores dos bundles

    O formato esperado para o arquivo de link é:
    ```
    {
        "journal_id": [
            {
                "id": "issue-2",
                "order": "0002",
                "number": "02",
                "volume": "02",
                "year": "2019",
                "supplement": "supplement",
            },
            {
                "id": "issue-2",
                "order": "0002",
                "number": "02",
                "volume": "02",
                "year": "2019",
                "supplement": "supplement",
            },

        ]
    }
    ```
    """

    links = reading.read_json_file(file_path)

    for journal_id, bundles in links.items():
        try:
            _journal = session.journals.fetch(journal_id)

            for bundle_id in bundles:
                try:
                    _journal.add_issue(bundle_id)
                except AlreadyExists:
                    logger.debug("Bundle %s already exists in journal %s" %
                                 (bundle_id["id"], journal_id))

            session.journals.update(_journal)
            session.changes.add({
                "timestamp": utcnow(),
                "entity": "Journal",
                "id": _journal.id()
            })
        except DoesNotExist:
            logger.debug("Journal %s does not exists, cannot link bundles." %
                         journal_id)
def register_document(folder: str, session_db, storage) -> None:

    logger.info("Processando a Pasta %s", folder)
    list_files = files.list_files(folder)

    obj_xml = None
    prefix = ""
    xml_files = files.xml_files_list(folder)
    medias_files = set(list_files) - set(xml_files)

    if len(xml_files) > 1:
        raise exceptions.XMLError("Existe %s xmls no pacote SPS",
                                  len(xml_files))
    else:
        try:
            x_file = xml_files[0]
        except IndexError as ex:
            raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex)

    xml_path = os.path.join(folder, x_file)
    obj_xml = xml.loadToXML(xml_path)

    xml_sps = SPS_Package(obj_xml)

    prefix = xml_sps.media_prefix
    url_xml = storage.register(xml_path, prefix)

    assets = []
    for m_file in medias_files:
        assets.append({
            "asset_id":
            m_file,
            "asset_url":
            storage.register(os.path.join(folder, m_file), prefix),
        })

    if obj_xml:
        manifest_data = ManifestDomainAdapter(
            manifest=manifest.get_document_manifest(obj_xml, url_xml, assets))

        try:
            session_db.documents.add(data=manifest_data)
            session_db.changes.add({
                "timestamp": utcnow(),
                "entity": "Document",
                "id": manifest_data.id()
            })
            logger.info("Document-store save: %s", manifest_data.id())
        except AlreadyExists as exc:
            logger.exception(exc)

    return obj_xml, manifest_data.id()
def link_documents_bundles_with_documents(documents_bundle: DocumentsBundle,
                                          documents: List[str],
                                          session: Session):
    """Função responsável por atualizar o relacionamento entre
    documents bundles e documents no nível de banco de dados"""

    for document in documents:
        try:
            documents_bundle.add_document(document)
        except AlreadyExists:
            logger.info("Document %s already exists in documents bundle %s" %
                        (document, documents_bundle))

    session.documents_bundles.update(documents_bundle)

    session.changes.add({
        "timestamp": utcnow(),
        "entity": "DocumentsBundle",
        "id": documents_bundle.id(),
    })
def import_issues(json_file: str, session: Session):
    """Fachada com passo a passo de processamento e carga de fascículo
    em formato JSON para a base Kernel"""

    issues_as_json = reading.read_json_file(json_file)
    issues_as_xylose = conversion.conversion_issues_to_xylose(issues_as_json)
    issues_as_xylose = filter_issues(issues_as_xylose)
    issues_as_kernel = conversion.conversion_issues_to_kernel(issues_as_xylose)

    for issue in issues_as_kernel:
        manifest = ManifestDomainAdapter(manifest=issue)

        try:
            session.documents_bundles.add(manifest)
            session.changes.add({
                "timestamp": utcnow(),
                "entity": "DocumentsBundle",
                "id": manifest.id(),
            })
        except AlreadyExists as exc:
            logger.info(str(exc))
Exemple #9
0
def import_journals(json_file: str, session: Session):
    """Fachada com passo a passo de processamento e carga de periódicos
    em formato JSON para a base Kernel"""

    try:
        journals_as_json = reading.read_json_file(json_file)
        journals_as_kernel = conversion.conversion_journals_to_kernel(
            journals=journals_as_json
        )

        for journal in journals_as_kernel:
            manifest = ManifestDomainAdapter(manifest=journal)

            try:
                session.journals.add(data=manifest)
                session.changes.add(
                    {"timestamp": utcnow(), "entity": "Journal", "id": manifest.id()}
                )
            except AlreadyExists as exc:
                logger.info(str(exc))
    except (FileNotFoundError, ValueError) as exc:
        logger.debug(str(exc))