def _get_keywords(data: dict) -> List[models.ArticleKeyword]: """Retorna a lista de palavras chaves do artigo e dos seus sub articles""" keywords = [ models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), } ) for kwd_group in _nestget(data, "kwd_group", default=[]) ] for sub_article in _nestget(data, "sub_article"): [ keywords.append( models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), } ) ) for kwd_group in _nestget(sub_article, "kwd_group", default=[]) ] return keywords
def register_document(data, issue_id, document_id, i_documents): """ Esta função pode lançar a exceção `models.Issue.DoesNotExist`. """ def nestget(data, *path, default=""): """ Obtém valores de list ou dicionários. """ for key_or_index in path: try: data = data[key_or_index] except (KeyError, IndexError): return default return data article = nestget(data, "article", 0) article_meta = nestget(data, "article_meta", 0) pub_date = nestget(data, "pub_date", 0) sub_articles = nestget(data, "sub_article") contribs = nestget(data, "contrib") document = models.Article() document.title = nestget(article_meta, "article_title", 0) document.section = nestget(article_meta, "pub_subject", 0) authors = [] valid_contrib_types = [ "author", "editor", "organizer", "translator", "autor", "compiler", ] for contrib in contribs: if nestget(contrib, "contrib_type", 0) in valid_contrib_types: authors.append("%s, %s" % ( nestget(contrib, "contrib_surname", 0), nestget(contrib, "contrib_given_names", 0), )) document.authors = authors document.abstract = nestget(article_meta, "abstract", 0) publisher_id = nestget(article_meta, "article_publisher_id", 0) document._id = publisher_id document.aid = publisher_id document.pid = nestget(article_meta, "article_publisher_id", 1) document.doi = nestget(article_meta, "article_doi", 0) original_lang = nestget(article, "lang", 0) # article.languages contém todas as traduções do artigo e o idioma original languages = [original_lang] trans_titles = [] trans_sections = [] trans_abstracts = [] trans_sections.append( models.TranslatedSection( **{ "name": nestget(article_meta, "pub_subject", 0), "language": original_lang, })) trans_abstracts.append( models.Abstract(**{ "text": document.abstract, "language": original_lang })) if data.get("trans_abstract"): for trans_abs in data.get("trans_abstract"): trans_abstracts.append( models.Abstract( **{ "text": nestget(trans_abs, "text", 0), "language": nestget(trans_abs, "lang", 0), })) keywords = [] for sub in sub_articles: lang = nestget(sub, "article", 0, "lang", 0) languages.append(lang) trans_titles.append( models.TranslatedTitle( **{ "name": nestget(sub, "article_meta", 0, "article_title", 0), "language": lang, })) trans_sections.append( models.TranslatedSection( **{ "name": nestget(sub, "article_meta", 0, "pub_subject", 0), "language": lang, })) trans_abstracts.append( models.Abstract( **{ "text": nestget(sub, "article_meta", 0, "abstract_p", 0), "language": lang, })) if data.get("kwd_group"): for kwd_group in nestget(data, "kwd_group"): keywords.append( models.ArticleKeyword( **{ "keywords": nestget(kwd_group, "kwd", default=[]), "language": nestget(kwd_group, "lang", 0), })) document.languages = languages document.translated_titles = trans_titles document.sections = trans_sections document.abstracts = trans_abstracts document.keywords = keywords document.abstract_languages = [ trans_abs["language"] for trans_abs in trans_abstracts ] document.original_language = original_lang document.publication_date = nestget(pub_date, "text", 0) document.type = nestget(article, "type", 0) document.elocation = nestget(article_meta, "pub_elocation", 0) document.fpage = nestget(article_meta, "pub_fpage", 0) document.fpage_sequence = nestget(article_meta, "pub_fpage_seq", 0) document.lpage = nestget(article_meta, "pub_lpage", 0) issue = models.Issue.objects.get(_id=issue_id) document.issue = issue document.journal = issue.journal document.order = i_documents.get(issue.id).index(document_id) document.xml = "%s/documents/%s" % (api_hook.base_url, document._id) document.save() return document