Esempio n. 1
0
def makeOneArticle(attrib=None):  # noqa
    """
    Retorna um objeto ``Article`` com os atributos obrigatórios:
    ``_id``, ``aid``, ``is_public``.
    Atualiza o objeto de retorno com os valores do param ``attrib``.
    """

    attrib = attrib or {}
    default_id = attrib.get('_id', str(uuid4().hex))
    default_pid = attrib.get('pid', '0000-00000000000000000')
    default_title = "article-%s" % default_id
    default_domain_key = "article-domain_key-%s" % default_id
    issue = attrib.get('issue', None)
    journal = attrib.get('journal', None)

    if not journal:
        journal = makeOneJournal()
    elif isinstance(journal, str) or isinstance(journal, str):
        # o valor de: journal é o Id do journal
        try:
            journal = models.Journal.objects.get(_id=journal)
        except models.Journal.DoesNotExist:
            journal = makeOneJournal({'_id': journal})
    elif isinstance(journal, models.Journal):
        pass
    else:
        raise ValueError('WTF is journal?')

    if not issue:
        issue = makeOneIssue({'journal': journal})
    elif isinstance(issue, str) or isinstance(issue, str):
        # o valor de: issue é o Id do issue
        try:
            issue = models.Issue.objects.get(_id=issue, journal=journal.id)
        except models.Issue.DoesNotExist:
            issue = makeOneIssue({'_id': issue, 'journal': journal.id})
    elif isinstance(issue, models.Issue):
        pass
    else:
        raise ValueError('WTF is issue?')

    article = {
        '_id': default_id,
        'aid': default_id,
        'title': attrib.get('title', default_title),
        'domain_key': attrib.get('domain_key', default_domain_key),
        'is_aop': attrib.get('is_aop', False),
        'is_public': attrib.get('is_public', True),
        'created': attrib.get('created', datetime.datetime.now()),
        'updated': attrib.get('updated', datetime.datetime.now()),
        'issue': issue.id,
        'journal': journal.id,
        'pid': default_pid,
        'original_language': attrib.get('original_language', 'pt'),
        'fpage': attrib.get('fpage', '15'),
        'lpage': attrib.get('lpage', '16'),
        'translated_titles': attrib.get('translated_titles', []),
        'languages': attrib.get('languages', [
            'pt',
        ]),
    }
    article.update(attrib)

    return models.Article(**article).save()
def ArticleFactory(
    document_id: str,
    data: dict,
    issue_id: str,
    document_order: int,
    document_xml_url: str,
) -> models.Article:
    """Cria uma instância de artigo a partir dos dados de entrada.

    Os dados do parâmetro `data` são adaptados ao formato exigido pelo
    modelo Article do OPAC Schema.

    Args:
        document_id (str): Identificador do documento
        data (dict): Estrutura contendo o `front` do documento.
        issue_id (str): Identificador de issue.
        document_order (int): Posição do artigo.
        document_xml_url (str): URL do XML do artigo

    Returns:
        models.Article: Instância de um artigo próprio do modelo de dados do
            OPAC.
    """
    def _nestget(data, *path, default=""):
        """Obtém valores de list ou dicionários."""
        for key_or_index in path:
            try:
                data = data[key_or_index]
            except (KeyError, IndexError):
                return default
        return data

    AUTHOR_CONTRIB_TYPES = (
        "author",
        "editor",
        "organizer",
        "translator",
        "autor",
        "compiler",
    )

    try:
        article = models.Article.objects.get(_id=document_id)

        if issue_id is None:
            issue_id = article.issue._id
    except models.Article.DoesNotExist:
        article = models.Article()

    # Dados principais
    article.title = _nestget(data, "article_meta", 0, "article_title", 0)
    article.section = _nestget(data, "article_meta", 0, "pub_subject", 0)
    article.abstract = _nestget(data, "article_meta", 0, "abstract", 0)

    # Identificadores
    article._id = document_id
    article.aid = document_id
    # Lista de SciELO PIDs dentro de article_meta
    scielo_pids = [(f"v{version}",
                    _nestget(data,
                             "article_meta",
                             0,
                             f"scielo_pid_v{version}",
                             0,
                             default=None)) for version in range(1, 4)]
    article.scielo_pids = {
        version: value
        for version, value in scielo_pids if value is not None
    }
    article.doi = _nestget(data, "article_meta", 0, "article_doi", 0)

    def _get_article_authors(data) -> Generator:
        """Recupera a lista de autores do artigo"""

        for contrib in _nestget(data, "contrib"):
            if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES:
                yield ("%s, %s" % (
                    _nestget(contrib, "contrib_surname", 0),
                    _nestget(contrib, "contrib_given_names", 0),
                ))

    def _get_original_language(data: dict) -> str:
        return _nestget(data, "article", 0, "lang", 0)

    def _get_languages(data: dict) -> List[str]:
        """Recupera a lista de idiomas em que o artigo foi publicado"""

        languages = [_get_original_language(data)]

        for sub_article in _nestget(data, "sub_article"):
            languages.append(_nestget(sub_article, "article", 0, "lang", 0))

        return languages

    def _get_translated_titles(data: dict) -> Generator:
        """Recupera a lista de títulos do artigo"""

        for sub_article in _nestget(data, "sub_article"):
            yield models.TranslatedTitle(
                **{
                    "name":
                    _nestget(sub_article, "article_meta", 0, "article_title",
                             0),
                    "language":
                    _nestget(sub_article, "article", 0, "lang", 0),
                })

    def _get_translated_sections(data: dict) -> List[models.TranslatedSection]:
        """Recupera a lista de seções traduzidas a partir do document front"""

        sections = [
            models.TranslatedSection(
                **{
                    "name": _nestget(data, "article_meta", 0, "pub_subject",
                                     0),
                    "language": _get_original_language(data),
                })
        ]

        for sub_article in _nestget(data, "sub_article"):
            sections.append(
                models.TranslatedSection(
                    **{
                        "name":
                        _nestget(sub_article, "article_meta", 0, "pub_subject",
                                 0),
                        "language":
                        _nestget(sub_article, "article", 0, "lang", 0),
                    }))

        return sections

    def _get_abstracts(data: dict) -> List[models.Abstract]:
        """Recupera todos os abstracts do artigo"""

        abstracts = [
            models.Abstract(
                **{
                    "text": _nestget(data, "article_meta", 0, "abstract", 0),
                    "language": _get_original_language(data),
                })
        ]

        for trans_abstract in data.get("trans_abstract", []):
            abstracts.append(
                models.Abstract(
                    **{
                        "text": _nestget(trans_abstract, "text", 0),
                        "language": _nestget(trans_abstract, "lang", 0),
                    }))

        for sub_article in _nestget(data, "sub_article"):
            abstracts.append(
                models.Abstract(
                    **{
                        "text":
                        _nestget(sub_article, "article_meta", 0, "abstract",
                                 0),
                        "language":
                        _nestget(sub_article, "article", 0, "lang", 0),
                    }))

        return abstracts

    def _get_keywords(data: dict) -> List[models.ArticleKeyword]:
        """Retorna a lista de palavras chaves do artigo e dos
        seus sub articles"""

        keywords = [
            models.ArticleKeyword(
                **{
                    "keywords": _nestget(kwd_group, "kwd", default=[]),
                    "language": _nestget(kwd_group, "lang", 0),
                }) for kwd_group in _nestget(data, "kwd_group", default=[])
        ]

        for sub_article in _nestget(data, "sub_article"):
            [
                keywords.append(
                    models.ArticleKeyword(
                        **{
                            "keywords": _nestget(kwd_group, "kwd", default=[]),
                            "language": _nestget(kwd_group, "lang", 0),
                        }))
                for kwd_group in _nestget(sub_article, "kwd_group", default=[])
            ]

        return keywords

    article.authors = list(_get_article_authors(data))
    article.languages = list(_get_languages(data))
    article.translated_titles = list(_get_translated_titles(data))
    article.trans_sections = list(_get_translated_sections(data))
    article.abstracts = list(_get_abstracts(data))
    article.keywords = list(_get_keywords(data))

    article.abstract_languages = [
        abstract["language"] for abstract in article.abstracts
    ]

    article.original_language = _get_original_language(data)
    article.publication_date = _nestget(data, "pub_date", 0, "text", 0)

    article.type = _nestget(data, "article", 0, "type", 0)

    # Dados de localização
    article.elocation = _nestget(data, "article_meta", 0, "pub_elocation", 0)
    article.fpage = _nestget(data, "article_meta", 0, "pub_fpage", 0)
    article.fpage_sequence = _nestget(data, "article_meta", 0, "pub_fpage_seq",
                                      0)
    article.lpage = _nestget(data, "article_meta", 0, "pub_lpage", 0)

    if article.issue is not None and article.issue.number == "ahead":
        if article.aop_url_segs is None:
            url_segs = {
                "url_seg_article": article.url_segment,
                "url_seg_issue": article.issue.url_segment,
            }
            article.aop_url_segs = models.AOPUrlSegments(**url_segs)

    # Issue vinculada
    issue = models.Issue.objects.get(_id=issue_id)
    article.issue = issue
    article.journal = issue.journal

    if document_order:
        article.order = int(document_order)

    article.xml = document_xml_url

    # Campo de compatibilidade do OPAC
    article.htmls = [{"lang": lang} for lang in _get_languages(data)]

    return article
Esempio n. 3
0
def ArticleFactory(
    document_id: str,
    data: dict,
    issue_id: str,
    document_order: int,
    document_xml_url: str,
    repeated_doc_pids=None,
    fetch_document_xml: callable = None,
) -> models.Article:
    """Cria uma instância de artigo a partir dos dados de entrada.

    Os dados do parâmetro `data` são adaptados ao formato exigido pelo
    modelo Article do OPAC Schema.

    Args:
        document_id (str): Identificador do documento
        data (dict): Estrutura contendo o `front` do documento.
        issue_id (str): Identificador de issue.
        document_order (int): Posição do artigo.
        document_xml_url (str): URL do XML do artigo
        fetch_document_xml (callable): Função para obter o XML do Kernel caso
        necessário.

    Returns:
        models.Article: Instância de um artigo próprio do modelo de dados do
            OPAC.
    """
    AUTHOR_CONTRIB_TYPES = (
        "author",
        "editor",
        "organizer",
        "translator",
        "autor",
        "compiler",
    )

    try:
        article = models.Article.objects.get(_id=document_id)

        if issue_id is None:
            issue_id = article.issue._id
    except models.Article.DoesNotExist:
        article = models.Article()

    # atualiza status
    article.is_public = True

    # Dados principais
    article.title = _get_main_article_title(data)
    article.section = _nestget(data, "article_meta", 0, "pub_subject", 0)
    article.abstract = _nestget(data, "article_meta", 0, "abstract", 0)

    # Identificadores
    article._id = document_id
    article.aid = document_id
    # Lista de SciELO PIDs dentro de article_meta
    scielo_pids = [(f"v{version}",
                    _nestget(data,
                             "article_meta",
                             0,
                             f"scielo_pid_v{version}",
                             0,
                             default=None)) for version in range(1, 4)]
    article.scielo_pids = {
        version: value
        for version, value in scielo_pids if value is not None
    }

    # insere outros tipos de PIDs/IDs em `scielo_ids['other']`
    article_publisher_id = _nestget(data, "article_meta", 0,
                                    "article_publisher_id") or []
    repeated_doc_pids = repeated_doc_pids or []
    repeated_doc_pids = list(set(repeated_doc_pids + article_publisher_id))
    if repeated_doc_pids:
        article.scielo_pids.update({"other": repeated_doc_pids})

    article.aop_pid = _nestget(data, "article_meta", 0, "previous_pid", 0)
    article.pid = article.scielo_pids.get("v2")

    article.doi = _nestget(data, "article_meta", 0, "article_doi", 0)

    def _get_article_authors(data) -> Generator:
        """Recupera a lista de autores do artigo"""

        for contrib in _nestget(data, "contrib"):
            if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES:
                yield ("%s%s, %s" % (
                    _nestget(contrib, "contrib_surname", 0),
                    " " + _nestget(contrib, "contrib_suffix", 0) if _nestget(
                        contrib, "contrib_suffix", 0) else "",
                    _nestget(contrib, "contrib_given_names", 0),
                ))

    def _get_author_affiliation(data, xref_aff_id):
        """Recupera a afiliação ``institution_orgname`` de xref_aff_id"""

        for aff in _nestget(data, "aff"):
            if _nestget(aff, "aff_id", 0) == xref_aff_id:
                return _nestget(aff, "institution_orgname", 0)

    def _get_article_authors_meta(data):
        """Recupera a lista de autores do artigo para popular opac_schema.AuthorMeta,
        contendo a afiliação e orcid"""

        authors = []

        for contrib in _nestget(data, "contrib"):
            if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES:
                author_dict = {}

                author_dict['name'] = "%s%s, %s" % (
                    _nestget(contrib, "contrib_surname", 0),
                    " " + _nestget(contrib, "contrib_suffix", 0) if _nestget(
                        contrib, "contrib_suffix", 0) else "",
                    _nestget(contrib, "contrib_given_names", 0),
                )

                if _nestget(contrib, "contrib_orcid", 0):
                    author_dict['orcid'] = _nestget(contrib, "contrib_orcid",
                                                    0)

                aff = _get_author_affiliation(data,
                                              _nestget(contrib, "xref_aff", 0))

                if aff:
                    author_dict['affiliation'] = aff

                authors.append(models.AuthorMeta(**author_dict))

        return authors

    def _get_original_language(data: dict) -> str:
        return _nestget(data, "article", 0, "lang", 0)

    def _get_languages(data: dict) -> List[str]:
        """Recupera a lista de idiomas em que o artigo foi publicado"""

        languages = [_get_original_language(data)]

        for sub_article in _nestget(data, "sub_article"):
            languages.append(_nestget(sub_article, "article", 0, "lang", 0))

        return languages

    def _get_translated_titles(data: dict) -> Generator:
        """Recupera a lista de títulos do artigo"""
        try:
            _lang = _get_original_language(data)
            for lang, title in data['display_format']['article_title'].items():
                if _lang != lang:
                    yield models.TranslatedTitle(**{
                        "name": title,
                        "language": lang,
                    })
        except KeyError:
            for sub_article in _nestget(data, "sub_article"):
                yield models.TranslatedTitle(
                    **{
                        "name":
                        _nestget(sub_article, "article_meta", 0,
                                 "article_title", 0),
                        "language":
                        _nestget(sub_article, "article", 0, "lang", 0),
                    })

    def _get_translated_sections(data: dict) -> List[models.TranslatedSection]:
        """Recupera a lista de seções traduzidas a partir do document front"""

        sections = [
            models.TranslatedSection(
                **{
                    "name": _nestget(data, "article_meta", 0, "pub_subject",
                                     0),
                    "language": _get_original_language(data),
                })
        ]

        for sub_article in _nestget(data, "sub_article"):
            sections.append(
                models.TranslatedSection(
                    **{
                        "name":
                        _nestget(sub_article, "article_meta", 0, "pub_subject",
                                 0),
                        "language":
                        _nestget(sub_article, "article", 0, "lang", 0),
                    }))

        return sections

    def _get_abstracts(data: dict) -> List[models.Abstract]:
        """Recupera todos os abstracts do artigo"""

        abstracts = []

        # Abstract do texto original
        if len(_nestget(data, "article_meta", 0, "abstract", 0)) > 0:
            abstracts.append(
                models.Abstract(
                    **{
                        "text": _nestget(data, "article_meta", 0, "abstract",
                                         0),
                        "language": _get_original_language(data),
                    }))

        # Trans abstracts
        abstracts += [
            models.Abstract(
                **{
                    "text": _nestget(trans_abstract, "text", 0),
                    "language": _nestget(trans_abstract, "lang", 0),
                }) for trans_abstract in data.get("trans_abstract", [])
            if trans_abstract and _nestget(trans_abstract, "text", 0)
        ]

        # Abstracts de sub-article
        abstracts += [
            models.Abstract(
                **{
                    "text": _nestget(sub_article, "article_meta", 0,
                                     "abstract", 0),
                    "language": _nestget(sub_article, "article", 0, "lang", 0),
                }) for sub_article in _nestget(data, "sub_article")
            if len(_nestget(sub_article, "article_meta", 0, "abstract", 0)) > 0
        ]

        return abstracts

    def _get_keywords(data: dict) -> List[models.ArticleKeyword]:
        """Retorna a lista de palavras chaves do artigo e dos
        seus sub articles"""

        keywords = [
            models.ArticleKeyword(
                **{
                    "keywords": _nestget(kwd_group, "kwd", default=[]),
                    "language": _nestget(kwd_group, "lang", 0),
                }) for kwd_group in _nestget(data, "kwd_group", default=[])
        ]

        for sub_article in _nestget(data, "sub_article"):
            [
                keywords.append(
                    models.ArticleKeyword(
                        **{
                            "keywords": _nestget(kwd_group, "kwd", default=[]),
                            "language": _nestget(kwd_group, "lang", 0),
                        }))
                for kwd_group in _nestget(sub_article, "kwd_group", default=[])
            ]

        return keywords

    def _get_order(document_order, pid_v2):
        try:
            return int(document_order)
        except (ValueError, TypeError):
            order_err_msg = ("'{}' is not a valid value for "
                             "'article.order'".format(document_order))
            try:
                document_order = int(pid_v2[-5:])
                logging.exception(
                    "{}. It was set '{} (the last 5 digits of PID v2)' to "
                    "'article.order'".format(order_err_msg, document_order))
                return document_order
            except (ValueError, TypeError):
                raise InvalidOrderValueError(order_err_msg)

    def _update_related_articles(article, related_dict):
        """
        Atualiza os documentos relacionados.

        Nesse método será realizado uma atualização no related_articles de
        ambos os documento ou seja ``["correction", "retraction", "addendum",] -> documento``
        quando ``documento -> ["correction", "retraction", "addendum",]``.

        Será necessário uma pesquisa na base de dados do OPAC para obter o
        pid_v3 dos documentos relacionado para que seja possível armazena-lo
        nessa relação.

        article = A instância corrente de models.Article(Artigo sendo processado)

        related_dict = {
                        "doi" : "10.1590/S0103-50532006000200015",
                        "related_type" : "retraction"
                        }

        Está sendo alterado o atributo related_articles do ``article``
        """

        related_article = None

        related_doi = related_dict.get('doi')

        article_data = {
            "ref_id": article._id,
            "doi": article.doi,
            "related_type": article.type,
        }

        if related_doi:
            try:
                # Busca por DOIs com maiúsculo e minúsculo ``doi__iexact``
                related_article = models.Article.objects.get(
                    doi__iexact=related_doi, is_public=True)
            except models.Article.MultipleObjectsReturned as ex:
                articles = models.Article.objects.filter(doi=related_doi,
                                                         is_public=True)

                logging.info(
                    "Foram encontrados na base de dados do site mais de 1 artigo com o DOI: %s. Lista de ID de artigos encontrados: %s"
                    % (related_doi, [d.id for d in articles]))

                # Quando existe mais de um registro no relacionamento, consideramos o primeiro encontrado.
                first_found = articles[0]

                logging.info(
                    "Para essa relação foi considerado o primeiro encontrado, artigo com id: %s"
                    % first_found.id)
                related_article = first_found
            except models.Article.DoesNotExist as ex:
                logging.error(
                    "Não foi possível encontrar na base de dados do site o artigo com DOI: %s, portanto, não foi possível atualiza o related_articles do relacionado, com os dados: %s, erro: %s"
                    % (related_doi, article_data, ex))

            if related_article:

                related_article_model = models.RelatedArticle(**article_data)

                # Garante a unicidade da relação.
                if related_article_model not in related_article.related_articles:
                    # Necessário atualizar o ``related_article`` com os dados do ``article`` caso ele exista na base de dados.
                    related_article.related_articles += [related_article_model]
                    related_article.save()

                # Atualiza a referência no ``ref_id`` no dicionário de ``related_article```
                related_dict['ref_id'] = related_article._id

                article_related_model = models.RelatedArticle(**related_dict)

                # Garante a unicidade da relação.
                if article_related_model not in article.related_articles:
                    article.related_articles += [article_related_model]
                    logging.info(
                        "Relacionamento entre o documento processado: %s e seu relacionado: %s, realizado com sucesso. Tipo de relação entre os documentos: %s"
                        % (article.doi, related_dict.get('doi'),
                           related_dict.get('related_type')))

    def _get_publication_date_by_type(publication_dates,
                                      date_type="pub",
                                      reverse_date=True):
        """
        Obtém a lista de datas de publicação do /front do kernel,
        no seguinte formato, exemplo:

        [{'text': ['2022'],
          'pub_type': [],
          'pub_format': ['electronic'],
          'date_type': ['collection'],
          'day': [],
          'month': [],
          'year': ['2022'],
          'season': []},
        {'text': ['02 02 2022'],
         'pub_type': [],
         'pub_format': ['electronic'],
         'date_type': ['pub'],
         'day': ['02'],
         'month': ['02'],
         'year': ['2022'],
         'season': []}]

         Retorna a data considerando a chave o tipo `date_type`.

         Return a string.
        """
        def _check_date_format(date_string, format="%Y-%m-%d"):
            """
            Check if date as string is a expected format.
            """
            try:
                return datetime.strptime(date_string, format).strftime(format)
            except ValueError:
                logging.info(
                    "The date isnt in a well format, the correct format: %s" %
                    format)

            return date_string

        try:
            formed_date = ""
            for pubdate in publication_dates or []:
                if date_type in pubdate.get('date_type'):
                    pubdate_list = [
                        _nestget(pubdate, 'day', 0),
                        _nestget(pubdate, 'month', 0),
                        _nestget(pubdate, 'year', 0)
                    ]
                    if reverse_date:
                        pubdate_list.reverse()
                    formed_date = "-".join(
                        [pub for pub in pubdate_list if pub])
            return _check_date_format(
                formed_date) if reverse_date else _check_date_format(
                    formed_date, "%d-%m-%Y")
        except (IndexError, AttributeError):
            raise KernelFrontHasNoPubYearError(
                "Missing publication date type: {} in list of dates: {}".
                format(date_type, publication_dates))

    def _get_related_articles(xml):
        """
        Obtém a lista de documentos relacionados do XML e atualiza os
        documentos dessa realação.

        Tag no XML que representa essa relação:
            <related-article ext-link-type="doi" id="ra1"
            related-article-type="corrected-article"
            xlink:href="10.1590/S0103-50532006000200015"/>
        """

        try:
            etree_xml = et.XML(xml)
        except ValueError as ex:
            logging.error("Erro ao tentar analisar(parser) do XML, erro: %s",
                          ex)
        else:

            sps_package = SPS_Package(etree_xml)

            for related_dict in sps_package.related_articles:
                _update_related_articles(article, related_dict)

    article.authors = list(_get_article_authors(data))
    article.authors_meta = _get_article_authors_meta(data)
    article.languages = list(_get_languages(data))
    article.translated_titles = list(_get_translated_titles(data))
    article.sections = list(_get_translated_sections(data))
    article.abstracts = list(_get_abstracts(data))
    article.keywords = list(_get_keywords(data))

    article.abstract_languages = [
        abstract["language"] for abstract in article.abstracts
    ]

    article.original_language = _get_original_language(data)
    publications_date = _nestget(data, "pub_date")

    if publications_date:
        formed_publication_date = _get_publication_date_by_type(
            publications_date, "pub")
        article.publication_date = formed_publication_date

    article.type = _nestget(data, "article", 0, "type", 0)

    # Dados de localização
    article.elocation = _nestget(data, "article_meta", 0, "pub_elocation", 0)
    article.fpage = _nestget(data, "article_meta", 0, "pub_fpage", 0)
    article.fpage_sequence = _nestget(data, "article_meta", 0, "pub_fpage_seq",
                                      0)
    article.lpage = _nestget(data, "article_meta", 0, "pub_lpage", 0)

    if article.issue is not None and article.issue.number == "ahead":
        if article.aop_url_segs is None:
            url_segs = {
                "url_seg_article": article.url_segment,
                "url_seg_issue": article.issue.url_segment,
            }
            article.aop_url_segs = models.AOPUrlSegments(**url_segs)

    # Issue vinculada
    issue = models.Issue.objects.get(_id=issue_id)

    logging.info("ISSUE %s" % str(issue))
    logging.info("ARTICLE.ISSUE %s" % str(article.issue))
    logging.info("ARTICLE.AOP_PID %s" % str(article.aop_pid))
    logging.info("ARTICLE.PID %s" % str(article.pid))

    article.issue = issue
    article.journal = issue.journal
    article.order = _get_order(document_order, article.pid)
    article.xml = document_xml_url

    # Se for uma errata ou retratação ou adendo.
    if article.type in ["correction", "retraction", "addendum"]:
        # Obtém o XML da errada no kernel
        xml = fetch_document_xml(document_id)
        _get_related_articles(xml)

    # Campo de compatibilidade do OPAC
    article.htmls = [{"lang": lang} for lang in _get_languages(data)]

    article.created = article.created or datetime.utcnow().isoformat()
    article.updated = datetime.utcnow().isoformat()

    return article
def register_document(data, issue_id, document_id, i_documents):
    """
    Esta função pode lançar a exceção `models.Issue.DoesNotExist`.
    """
    def nestget(data, *path, default=""):
        """
        Obtém valores de list ou dicionários.
        """
        for key_or_index in path:
            try:
                data = data[key_or_index]
            except (KeyError, IndexError):
                return default
        return data

    article = nestget(data, "article", 0)
    article_meta = nestget(data, "article_meta", 0)
    pub_date = nestget(data, "pub_date", 0)
    sub_articles = nestget(data, "sub_article")
    contribs = nestget(data, "contrib")

    document = models.Article()

    document.title = nestget(article_meta, "article_title", 0)
    document.section = nestget(article_meta, "pub_subject", 0)

    authors = []

    valid_contrib_types = [
        "author",
        "editor",
        "organizer",
        "translator",
        "autor",
        "compiler",
    ]

    for contrib in contribs:

        if nestget(contrib, "contrib_type", 0) in valid_contrib_types:
            authors.append("%s, %s" % (
                nestget(contrib, "contrib_surname", 0),
                nestget(contrib, "contrib_given_names", 0),
            ))

    document.authors = authors

    document.abstract = nestget(article_meta, "abstract", 0)

    publisher_id = nestget(article_meta, "article_publisher_id", 0)

    document._id = publisher_id
    document.aid = publisher_id
    document.pid = nestget(article_meta, "article_publisher_id", 1)
    document.doi = nestget(article_meta, "article_doi", 0)

    original_lang = nestget(article, "lang", 0)

    # article.languages contém todas as traduções do artigo e o idioma original
    languages = [original_lang]
    trans_titles = []
    trans_sections = []
    trans_abstracts = []

    trans_sections.append(
        models.TranslatedSection(
            **{
                "name": nestget(article_meta, "pub_subject", 0),
                "language": original_lang,
            }))

    trans_abstracts.append(
        models.Abstract(**{
            "text": document.abstract,
            "language": original_lang
        }))

    if data.get("trans_abstract"):

        for trans_abs in data.get("trans_abstract"):
            trans_abstracts.append(
                models.Abstract(
                    **{
                        "text": nestget(trans_abs, "text", 0),
                        "language": nestget(trans_abs, "lang", 0),
                    }))

    keywords = []
    for sub in sub_articles:
        lang = nestget(sub, "article", 0, "lang", 0)

        languages.append(lang)

        trans_titles.append(
            models.TranslatedTitle(
                **{
                    "name": nestget(sub, "article_meta", 0, "article_title",
                                    0),
                    "language": lang,
                }))

        trans_sections.append(
            models.TranslatedSection(
                **{
                    "name": nestget(sub, "article_meta", 0, "pub_subject", 0),
                    "language": lang,
                }))

        trans_abstracts.append(
            models.Abstract(
                **{
                    "text": nestget(sub, "article_meta", 0, "abstract_p", 0),
                    "language": lang,
                }))

    if data.get("kwd_group"):

        for kwd_group in nestget(data, "kwd_group"):

            keywords.append(
                models.ArticleKeyword(
                    **{
                        "keywords": nestget(kwd_group, "kwd", default=[]),
                        "language": nestget(kwd_group, "lang", 0),
                    }))

    document.languages = languages
    document.translated_titles = trans_titles
    document.sections = trans_sections
    document.abstracts = trans_abstracts
    document.keywords = keywords
    document.abstract_languages = [
        trans_abs["language"] for trans_abs in trans_abstracts
    ]

    document.original_language = original_lang

    document.publication_date = nestget(pub_date, "text", 0)

    document.type = nestget(article, "type", 0)
    document.elocation = nestget(article_meta, "pub_elocation", 0)
    document.fpage = nestget(article_meta, "pub_fpage", 0)
    document.fpage_sequence = nestget(article_meta, "pub_fpage_seq", 0)
    document.lpage = nestget(article_meta, "pub_lpage", 0)

    issue = models.Issue.objects.get(_id=issue_id)

    document.issue = issue
    document.journal = issue.journal

    document.order = i_documents.get(issue.id).index(document_id)
    document.xml = "%s/documents/%s" % (api_hook.base_url, document._id)

    document.save()

    return document