def register_document(folder: str, session_db, storage) -> None:

    logger.info("Processando a Pasta %s", folder)
    list_files = files.list_files(folder)

    obj_xml = None
    prefix = ""
    xml_files = files.xml_files_list(folder)
    _renditions = list(
        filter(lambda file: ".pdf" in file or ".html" in file, list_files))

    if len(xml_files) > 1:
        raise exceptions.XMLError("Existe %s xmls no pacote SPS",
                                  len(xml_files))
    else:
        try:
            x_file = xml_files[0]
        except IndexError as ex:
            raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex)

    xml_path = os.path.join(folder, x_file)
    obj_xml = xml.loadToXML(xml_path)

    xml_sps = SPS_Package(obj_xml)

    # TODO: é possível que alguns artigos não possuam o self.acron
    prefix = xml_sps.media_prefix
    url_xml = storage.register(xml_path, prefix)

    static_assets, static_additionals = get_document_assets_path(
        obj_xml, list_files, folder)
    registered_assets = put_static_assets_into_storage(static_assets, prefix,
                                                       storage)

    for additional_path in static_additionals.values():
        storage.register(os.path.join(additional_path), prefix)

    if obj_xml:
        renditions = get_document_renditions(folder, _renditions, prefix,
                                             storage)
        manifest_data = ManifestDomainAdapter(
            manifest=manifest.get_document_manifest(
                obj_xml, url_xml, registered_assets, renditions))

        try:
            session_db.documents.add(data=manifest_data)
            session_db.changes.add({
                "timestamp": utcnow(),
                "entity": "Document",
                "id": manifest_data.id()
            })
            logger.info("Document-store save: %s", manifest_data.id())
        except AlreadyExists as exc:
            logger.exception(exc)

    return obj_xml, manifest_data.id()
def register_document(folder: str, session_db, storage) -> None:

    logger.info("Processando a Pasta %s", folder)
    list_files = files.list_files(folder)

    obj_xml = None
    prefix = ""
    xml_files = files.xml_files_list(folder)
    medias_files = set(list_files) - set(xml_files)

    if len(xml_files) > 1:
        raise exceptions.XMLError("Existe %s xmls no pacote SPS",
                                  len(xml_files))
    else:
        try:
            x_file = xml_files[0]
        except IndexError as ex:
            raise exceptions.XMLError("Não existe XML no pacote SPS: %s", ex)

    xml_path = os.path.join(folder, x_file)
    obj_xml = xml.loadToXML(xml_path)

    xml_sps = SPS_Package(obj_xml)

    prefix = xml_sps.media_prefix
    url_xml = storage.register(xml_path, prefix)

    assets = []
    for m_file in medias_files:
        assets.append({
            "asset_id":
            m_file,
            "asset_url":
            storage.register(os.path.join(folder, m_file), prefix),
        })

    if obj_xml:
        manifest_data = ManifestDomainAdapter(
            manifest=manifest.get_document_manifest(obj_xml, url_xml, assets))

        try:
            session_db.documents.add(data=manifest_data)
            session_db.changes.add({
                "timestamp": utcnow(),
                "entity": "Document",
                "id": manifest_data.id()
            })
            logger.info("Document-store save: %s", manifest_data.id())
        except AlreadyExists as exc:
            logger.exception(exc)

    return obj_xml, manifest_data.id()
    def media_prefix(self):

        if not self.scielo_id:
            raise exceptions.XMLError("Não existe Scielo-Id no XML: %s",
                                      repr(self))

        return f"{self.issn}/{self.scielo_id}"
Example #4
0
def register_document(folder: str, session, storage, pid_database_engine, poison_pill=PoisonPill()) -> None:
    """Registra registra pacotes SPS em uma instância do Kernel e seus
    ativos digitais em um object storage."""

    if poison_pill.poisoned:
        return

    logger.debug("Starting the import step for '%s' package.", folder)

    package_files = files.list_files(folder)
    xmls = files.xml_files_list(folder)

    if xmls is None or len(xmls) == 0:
        raise exceptions.XMLError(
            "There is no XML file into package '%s'. Please verify and try later."
            % folder
        ) from None

    xml_path = os.path.join(folder, xmls[0])
    constructor.article_xml_constructor(xml_path, folder, pid_database_engine, False)

    try:
        obj_xml = xml.loadToXML(xml_path)
    except lxml.etree.ParseError as exc:
        raise exceptions.XMLError(
            "Could not parse the '%s' file, please validate"
            " this file before then try to import again." % xml_path,
        ) from None

    xml_sps = SPS_Package(obj_xml)

    pid_v3 = xml_sps.scielo_pid_v3

    try:
        session.documents.fetch(id=pid_v3)
    except DoesNotExist:
        pass
    else:
        logger.debug(
            "Document '%s' already exist in kernel. Returning article result information",
            pid_v3,
        )
        return get_article_result_dict(xml_sps)

    prefix = xml_sps.media_prefix or ""
    url_xml = storage.register(xml_path, prefix)
    static_assets, static_additionals = get_document_assets_path(
        obj_xml, package_files, folder
    )
    registered_assets = put_static_assets_into_storage(static_assets, prefix, storage)

    for additional_path in static_additionals.values():
        storage.register(os.path.join(additional_path), prefix)

    renditions = get_document_renditions(folder, prefix, storage)
    document = Document(
        manifest=manifest.get_document_manifest(
            xml_sps, url_xml, registered_assets, renditions
        )
    )

    try:
        add_document(session, document)
        if renditions:
            add_renditions(session, document)
    except AlreadyExists as exc:
        logger.error(exc)
    else:
        logger.debug("Document with id '%s' was imported.", document.id())

    return get_article_result_dict(xml_sps)
Example #5
0
    def media_prefix(self):

        if not self.scielo_pid_v3:
            raise exceptions.XMLError("Não existe scielo-pid-v3")

        return f"{self.issn}/{self.scielo_pid_v3}"