def makeOneArticle(attrib=None): # noqa """ Retorna um objeto ``Article`` com os atributos obrigatórios: ``_id``, ``aid``, ``is_public``. Atualiza o objeto de retorno com os valores do param ``attrib``. """ attrib = attrib or {} default_id = attrib.get('_id', str(uuid4().hex)) default_pid = attrib.get('pid', '0000-00000000000000000') default_title = "article-%s" % default_id default_domain_key = "article-domain_key-%s" % default_id issue = attrib.get('issue', None) journal = attrib.get('journal', None) if not journal: journal = makeOneJournal() elif isinstance(journal, str) or isinstance(journal, str): # o valor de: journal é o Id do journal try: journal = models.Journal.objects.get(_id=journal) except models.Journal.DoesNotExist: journal = makeOneJournal({'_id': journal}) elif isinstance(journal, models.Journal): pass else: raise ValueError('WTF is journal?') if not issue: issue = makeOneIssue({'journal': journal}) elif isinstance(issue, str) or isinstance(issue, str): # o valor de: issue é o Id do issue try: issue = models.Issue.objects.get(_id=issue, journal=journal.id) except models.Issue.DoesNotExist: issue = makeOneIssue({'_id': issue, 'journal': journal.id}) elif isinstance(issue, models.Issue): pass else: raise ValueError('WTF is issue?') article = { '_id': default_id, 'aid': default_id, 'title': attrib.get('title', default_title), 'domain_key': attrib.get('domain_key', default_domain_key), 'is_aop': attrib.get('is_aop', False), 'is_public': attrib.get('is_public', True), 'created': attrib.get('created', datetime.datetime.now()), 'updated': attrib.get('updated', datetime.datetime.now()), 'issue': issue.id, 'journal': journal.id, 'pid': default_pid, 'original_language': attrib.get('original_language', 'pt'), 'fpage': attrib.get('fpage', '15'), 'lpage': attrib.get('lpage', '16'), 'translated_titles': attrib.get('translated_titles', []), 'languages': attrib.get('languages', [ 'pt', ]), } article.update(attrib) return models.Article(**article).save()
def ArticleFactory( document_id: str, data: dict, issue_id: str, document_order: int, document_xml_url: str, ) -> models.Article: """Cria uma instância de artigo a partir dos dados de entrada. Os dados do parâmetro `data` são adaptados ao formato exigido pelo modelo Article do OPAC Schema. Args: document_id (str): Identificador do documento data (dict): Estrutura contendo o `front` do documento. issue_id (str): Identificador de issue. document_order (int): Posição do artigo. document_xml_url (str): URL do XML do artigo Returns: models.Article: Instância de um artigo próprio do modelo de dados do OPAC. """ def _nestget(data, *path, default=""): """Obtém valores de list ou dicionários.""" for key_or_index in path: try: data = data[key_or_index] except (KeyError, IndexError): return default return data AUTHOR_CONTRIB_TYPES = ( "author", "editor", "organizer", "translator", "autor", "compiler", ) try: article = models.Article.objects.get(_id=document_id) if issue_id is None: issue_id = article.issue._id except models.Article.DoesNotExist: article = models.Article() # Dados principais article.title = _nestget(data, "article_meta", 0, "article_title", 0) article.section = _nestget(data, "article_meta", 0, "pub_subject", 0) article.abstract = _nestget(data, "article_meta", 0, "abstract", 0) # Identificadores article._id = document_id article.aid = document_id # Lista de SciELO PIDs dentro de article_meta scielo_pids = [(f"v{version}", _nestget(data, "article_meta", 0, f"scielo_pid_v{version}", 0, default=None)) for version in range(1, 4)] article.scielo_pids = { version: value for version, value in scielo_pids if value is not None } article.doi = _nestget(data, "article_meta", 0, "article_doi", 0) def _get_article_authors(data) -> Generator: """Recupera a lista de autores do artigo""" for contrib in _nestget(data, "contrib"): if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES: yield ("%s, %s" % ( _nestget(contrib, "contrib_surname", 0), _nestget(contrib, "contrib_given_names", 0), )) def _get_original_language(data: dict) -> str: return _nestget(data, "article", 0, "lang", 0) def _get_languages(data: dict) -> List[str]: """Recupera a lista de idiomas em que o artigo foi publicado""" languages = [_get_original_language(data)] for sub_article in _nestget(data, "sub_article"): languages.append(_nestget(sub_article, "article", 0, "lang", 0)) return languages def _get_translated_titles(data: dict) -> Generator: """Recupera a lista de títulos do artigo""" for sub_article in _nestget(data, "sub_article"): yield models.TranslatedTitle( **{ "name": _nestget(sub_article, "article_meta", 0, "article_title", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), }) def _get_translated_sections(data: dict) -> List[models.TranslatedSection]: """Recupera a lista de seções traduzidas a partir do document front""" sections = [ models.TranslatedSection( **{ "name": _nestget(data, "article_meta", 0, "pub_subject", 0), "language": _get_original_language(data), }) ] for sub_article in _nestget(data, "sub_article"): sections.append( models.TranslatedSection( **{ "name": _nestget(sub_article, "article_meta", 0, "pub_subject", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), })) return sections def _get_abstracts(data: dict) -> List[models.Abstract]: """Recupera todos os abstracts do artigo""" abstracts = [ models.Abstract( **{ "text": _nestget(data, "article_meta", 0, "abstract", 0), "language": _get_original_language(data), }) ] for trans_abstract in data.get("trans_abstract", []): abstracts.append( models.Abstract( **{ "text": _nestget(trans_abstract, "text", 0), "language": _nestget(trans_abstract, "lang", 0), })) for sub_article in _nestget(data, "sub_article"): abstracts.append( models.Abstract( **{ "text": _nestget(sub_article, "article_meta", 0, "abstract", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), })) return abstracts def _get_keywords(data: dict) -> List[models.ArticleKeyword]: """Retorna a lista de palavras chaves do artigo e dos seus sub articles""" keywords = [ models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), }) for kwd_group in _nestget(data, "kwd_group", default=[]) ] for sub_article in _nestget(data, "sub_article"): [ keywords.append( models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), })) for kwd_group in _nestget(sub_article, "kwd_group", default=[]) ] return keywords article.authors = list(_get_article_authors(data)) article.languages = list(_get_languages(data)) article.translated_titles = list(_get_translated_titles(data)) article.trans_sections = list(_get_translated_sections(data)) article.abstracts = list(_get_abstracts(data)) article.keywords = list(_get_keywords(data)) article.abstract_languages = [ abstract["language"] for abstract in article.abstracts ] article.original_language = _get_original_language(data) article.publication_date = _nestget(data, "pub_date", 0, "text", 0) article.type = _nestget(data, "article", 0, "type", 0) # Dados de localização article.elocation = _nestget(data, "article_meta", 0, "pub_elocation", 0) article.fpage = _nestget(data, "article_meta", 0, "pub_fpage", 0) article.fpage_sequence = _nestget(data, "article_meta", 0, "pub_fpage_seq", 0) article.lpage = _nestget(data, "article_meta", 0, "pub_lpage", 0) if article.issue is not None and article.issue.number == "ahead": if article.aop_url_segs is None: url_segs = { "url_seg_article": article.url_segment, "url_seg_issue": article.issue.url_segment, } article.aop_url_segs = models.AOPUrlSegments(**url_segs) # Issue vinculada issue = models.Issue.objects.get(_id=issue_id) article.issue = issue article.journal = issue.journal if document_order: article.order = int(document_order) article.xml = document_xml_url # Campo de compatibilidade do OPAC article.htmls = [{"lang": lang} for lang in _get_languages(data)] return article
def ArticleFactory( document_id: str, data: dict, issue_id: str, document_order: int, document_xml_url: str, repeated_doc_pids=None, fetch_document_xml: callable = None, ) -> models.Article: """Cria uma instância de artigo a partir dos dados de entrada. Os dados do parâmetro `data` são adaptados ao formato exigido pelo modelo Article do OPAC Schema. Args: document_id (str): Identificador do documento data (dict): Estrutura contendo o `front` do documento. issue_id (str): Identificador de issue. document_order (int): Posição do artigo. document_xml_url (str): URL do XML do artigo fetch_document_xml (callable): Função para obter o XML do Kernel caso necessário. Returns: models.Article: Instância de um artigo próprio do modelo de dados do OPAC. """ AUTHOR_CONTRIB_TYPES = ( "author", "editor", "organizer", "translator", "autor", "compiler", ) try: article = models.Article.objects.get(_id=document_id) if issue_id is None: issue_id = article.issue._id except models.Article.DoesNotExist: article = models.Article() # atualiza status article.is_public = True # Dados principais article.title = _get_main_article_title(data) article.section = _nestget(data, "article_meta", 0, "pub_subject", 0) article.abstract = _nestget(data, "article_meta", 0, "abstract", 0) # Identificadores article._id = document_id article.aid = document_id # Lista de SciELO PIDs dentro de article_meta scielo_pids = [(f"v{version}", _nestget(data, "article_meta", 0, f"scielo_pid_v{version}", 0, default=None)) for version in range(1, 4)] article.scielo_pids = { version: value for version, value in scielo_pids if value is not None } # insere outros tipos de PIDs/IDs em `scielo_ids['other']` article_publisher_id = _nestget(data, "article_meta", 0, "article_publisher_id") or [] repeated_doc_pids = repeated_doc_pids or [] repeated_doc_pids = list(set(repeated_doc_pids + article_publisher_id)) if repeated_doc_pids: article.scielo_pids.update({"other": repeated_doc_pids}) article.aop_pid = _nestget(data, "article_meta", 0, "previous_pid", 0) article.pid = article.scielo_pids.get("v2") article.doi = _nestget(data, "article_meta", 0, "article_doi", 0) def _get_article_authors(data) -> Generator: """Recupera a lista de autores do artigo""" for contrib in _nestget(data, "contrib"): if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES: yield ("%s%s, %s" % ( _nestget(contrib, "contrib_surname", 0), " " + _nestget(contrib, "contrib_suffix", 0) if _nestget( contrib, "contrib_suffix", 0) else "", _nestget(contrib, "contrib_given_names", 0), )) def _get_author_affiliation(data, xref_aff_id): """Recupera a afiliação ``institution_orgname`` de xref_aff_id""" for aff in _nestget(data, "aff"): if _nestget(aff, "aff_id", 0) == xref_aff_id: return _nestget(aff, "institution_orgname", 0) def _get_article_authors_meta(data): """Recupera a lista de autores do artigo para popular opac_schema.AuthorMeta, contendo a afiliação e orcid""" authors = [] for contrib in _nestget(data, "contrib"): if _nestget(contrib, "contrib_type", 0) in AUTHOR_CONTRIB_TYPES: author_dict = {} author_dict['name'] = "%s%s, %s" % ( _nestget(contrib, "contrib_surname", 0), " " + _nestget(contrib, "contrib_suffix", 0) if _nestget( contrib, "contrib_suffix", 0) else "", _nestget(contrib, "contrib_given_names", 0), ) if _nestget(contrib, "contrib_orcid", 0): author_dict['orcid'] = _nestget(contrib, "contrib_orcid", 0) aff = _get_author_affiliation(data, _nestget(contrib, "xref_aff", 0)) if aff: author_dict['affiliation'] = aff authors.append(models.AuthorMeta(**author_dict)) return authors def _get_original_language(data: dict) -> str: return _nestget(data, "article", 0, "lang", 0) def _get_languages(data: dict) -> List[str]: """Recupera a lista de idiomas em que o artigo foi publicado""" languages = [_get_original_language(data)] for sub_article in _nestget(data, "sub_article"): languages.append(_nestget(sub_article, "article", 0, "lang", 0)) return languages def _get_translated_titles(data: dict) -> Generator: """Recupera a lista de títulos do artigo""" try: _lang = _get_original_language(data) for lang, title in data['display_format']['article_title'].items(): if _lang != lang: yield models.TranslatedTitle(**{ "name": title, "language": lang, }) except KeyError: for sub_article in _nestget(data, "sub_article"): yield models.TranslatedTitle( **{ "name": _nestget(sub_article, "article_meta", 0, "article_title", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), }) def _get_translated_sections(data: dict) -> List[models.TranslatedSection]: """Recupera a lista de seções traduzidas a partir do document front""" sections = [ models.TranslatedSection( **{ "name": _nestget(data, "article_meta", 0, "pub_subject", 0), "language": _get_original_language(data), }) ] for sub_article in _nestget(data, "sub_article"): sections.append( models.TranslatedSection( **{ "name": _nestget(sub_article, "article_meta", 0, "pub_subject", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), })) return sections def _get_abstracts(data: dict) -> List[models.Abstract]: """Recupera todos os abstracts do artigo""" abstracts = [] # Abstract do texto original if len(_nestget(data, "article_meta", 0, "abstract", 0)) > 0: abstracts.append( models.Abstract( **{ "text": _nestget(data, "article_meta", 0, "abstract", 0), "language": _get_original_language(data), })) # Trans abstracts abstracts += [ models.Abstract( **{ "text": _nestget(trans_abstract, "text", 0), "language": _nestget(trans_abstract, "lang", 0), }) for trans_abstract in data.get("trans_abstract", []) if trans_abstract and _nestget(trans_abstract, "text", 0) ] # Abstracts de sub-article abstracts += [ models.Abstract( **{ "text": _nestget(sub_article, "article_meta", 0, "abstract", 0), "language": _nestget(sub_article, "article", 0, "lang", 0), }) for sub_article in _nestget(data, "sub_article") if len(_nestget(sub_article, "article_meta", 0, "abstract", 0)) > 0 ] return abstracts def _get_keywords(data: dict) -> List[models.ArticleKeyword]: """Retorna a lista de palavras chaves do artigo e dos seus sub articles""" keywords = [ models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), }) for kwd_group in _nestget(data, "kwd_group", default=[]) ] for sub_article in _nestget(data, "sub_article"): [ keywords.append( models.ArticleKeyword( **{ "keywords": _nestget(kwd_group, "kwd", default=[]), "language": _nestget(kwd_group, "lang", 0), })) for kwd_group in _nestget(sub_article, "kwd_group", default=[]) ] return keywords def _get_order(document_order, pid_v2): try: return int(document_order) except (ValueError, TypeError): order_err_msg = ("'{}' is not a valid value for " "'article.order'".format(document_order)) try: document_order = int(pid_v2[-5:]) logging.exception( "{}. It was set '{} (the last 5 digits of PID v2)' to " "'article.order'".format(order_err_msg, document_order)) return document_order except (ValueError, TypeError): raise InvalidOrderValueError(order_err_msg) def _update_related_articles(article, related_dict): """ Atualiza os documentos relacionados. Nesse método será realizado uma atualização no related_articles de ambos os documento ou seja ``["correction", "retraction", "addendum",] -> documento`` quando ``documento -> ["correction", "retraction", "addendum",]``. Será necessário uma pesquisa na base de dados do OPAC para obter o pid_v3 dos documentos relacionado para que seja possível armazena-lo nessa relação. article = A instância corrente de models.Article(Artigo sendo processado) related_dict = { "doi" : "10.1590/S0103-50532006000200015", "related_type" : "retraction" } Está sendo alterado o atributo related_articles do ``article`` """ related_article = None related_doi = related_dict.get('doi') article_data = { "ref_id": article._id, "doi": article.doi, "related_type": article.type, } if related_doi: try: # Busca por DOIs com maiúsculo e minúsculo ``doi__iexact`` related_article = models.Article.objects.get( doi__iexact=related_doi, is_public=True) except models.Article.MultipleObjectsReturned as ex: articles = models.Article.objects.filter(doi=related_doi, is_public=True) logging.info( "Foram encontrados na base de dados do site mais de 1 artigo com o DOI: %s. Lista de ID de artigos encontrados: %s" % (related_doi, [d.id for d in articles])) # Quando existe mais de um registro no relacionamento, consideramos o primeiro encontrado. first_found = articles[0] logging.info( "Para essa relação foi considerado o primeiro encontrado, artigo com id: %s" % first_found.id) related_article = first_found except models.Article.DoesNotExist as ex: logging.error( "Não foi possível encontrar na base de dados do site o artigo com DOI: %s, portanto, não foi possível atualiza o related_articles do relacionado, com os dados: %s, erro: %s" % (related_doi, article_data, ex)) if related_article: related_article_model = models.RelatedArticle(**article_data) # Garante a unicidade da relação. if related_article_model not in related_article.related_articles: # Necessário atualizar o ``related_article`` com os dados do ``article`` caso ele exista na base de dados. related_article.related_articles += [related_article_model] related_article.save() # Atualiza a referência no ``ref_id`` no dicionário de ``related_article``` related_dict['ref_id'] = related_article._id article_related_model = models.RelatedArticle(**related_dict) # Garante a unicidade da relação. if article_related_model not in article.related_articles: article.related_articles += [article_related_model] logging.info( "Relacionamento entre o documento processado: %s e seu relacionado: %s, realizado com sucesso. Tipo de relação entre os documentos: %s" % (article.doi, related_dict.get('doi'), related_dict.get('related_type'))) def _get_publication_date_by_type(publication_dates, date_type="pub", reverse_date=True): """ Obtém a lista de datas de publicação do /front do kernel, no seguinte formato, exemplo: [{'text': ['2022'], 'pub_type': [], 'pub_format': ['electronic'], 'date_type': ['collection'], 'day': [], 'month': [], 'year': ['2022'], 'season': []}, {'text': ['02 02 2022'], 'pub_type': [], 'pub_format': ['electronic'], 'date_type': ['pub'], 'day': ['02'], 'month': ['02'], 'year': ['2022'], 'season': []}] Retorna a data considerando a chave o tipo `date_type`. Return a string. """ def _check_date_format(date_string, format="%Y-%m-%d"): """ Check if date as string is a expected format. """ try: return datetime.strptime(date_string, format).strftime(format) except ValueError: logging.info( "The date isnt in a well format, the correct format: %s" % format) return date_string try: formed_date = "" for pubdate in publication_dates or []: if date_type in pubdate.get('date_type'): pubdate_list = [ _nestget(pubdate, 'day', 0), _nestget(pubdate, 'month', 0), _nestget(pubdate, 'year', 0) ] if reverse_date: pubdate_list.reverse() formed_date = "-".join( [pub for pub in pubdate_list if pub]) return _check_date_format( formed_date) if reverse_date else _check_date_format( formed_date, "%d-%m-%Y") except (IndexError, AttributeError): raise KernelFrontHasNoPubYearError( "Missing publication date type: {} in list of dates: {}". format(date_type, publication_dates)) def _get_related_articles(xml): """ Obtém a lista de documentos relacionados do XML e atualiza os documentos dessa realação. Tag no XML que representa essa relação: <related-article ext-link-type="doi" id="ra1" related-article-type="corrected-article" xlink:href="10.1590/S0103-50532006000200015"/> """ try: etree_xml = et.XML(xml) except ValueError as ex: logging.error("Erro ao tentar analisar(parser) do XML, erro: %s", ex) else: sps_package = SPS_Package(etree_xml) for related_dict in sps_package.related_articles: _update_related_articles(article, related_dict) article.authors = list(_get_article_authors(data)) article.authors_meta = _get_article_authors_meta(data) article.languages = list(_get_languages(data)) article.translated_titles = list(_get_translated_titles(data)) article.sections = list(_get_translated_sections(data)) article.abstracts = list(_get_abstracts(data)) article.keywords = list(_get_keywords(data)) article.abstract_languages = [ abstract["language"] for abstract in article.abstracts ] article.original_language = _get_original_language(data) publications_date = _nestget(data, "pub_date") if publications_date: formed_publication_date = _get_publication_date_by_type( publications_date, "pub") article.publication_date = formed_publication_date article.type = _nestget(data, "article", 0, "type", 0) # Dados de localização article.elocation = _nestget(data, "article_meta", 0, "pub_elocation", 0) article.fpage = _nestget(data, "article_meta", 0, "pub_fpage", 0) article.fpage_sequence = _nestget(data, "article_meta", 0, "pub_fpage_seq", 0) article.lpage = _nestget(data, "article_meta", 0, "pub_lpage", 0) if article.issue is not None and article.issue.number == "ahead": if article.aop_url_segs is None: url_segs = { "url_seg_article": article.url_segment, "url_seg_issue": article.issue.url_segment, } article.aop_url_segs = models.AOPUrlSegments(**url_segs) # Issue vinculada issue = models.Issue.objects.get(_id=issue_id) logging.info("ISSUE %s" % str(issue)) logging.info("ARTICLE.ISSUE %s" % str(article.issue)) logging.info("ARTICLE.AOP_PID %s" % str(article.aop_pid)) logging.info("ARTICLE.PID %s" % str(article.pid)) article.issue = issue article.journal = issue.journal article.order = _get_order(document_order, article.pid) article.xml = document_xml_url # Se for uma errata ou retratação ou adendo. if article.type in ["correction", "retraction", "addendum"]: # Obtém o XML da errada no kernel xml = fetch_document_xml(document_id) _get_related_articles(xml) # Campo de compatibilidade do OPAC article.htmls = [{"lang": lang} for lang in _get_languages(data)] article.created = article.created or datetime.utcnow().isoformat() article.updated = datetime.utcnow().isoformat() return article
def register_document(data, issue_id, document_id, i_documents): """ Esta função pode lançar a exceção `models.Issue.DoesNotExist`. """ def nestget(data, *path, default=""): """ Obtém valores de list ou dicionários. """ for key_or_index in path: try: data = data[key_or_index] except (KeyError, IndexError): return default return data article = nestget(data, "article", 0) article_meta = nestget(data, "article_meta", 0) pub_date = nestget(data, "pub_date", 0) sub_articles = nestget(data, "sub_article") contribs = nestget(data, "contrib") document = models.Article() document.title = nestget(article_meta, "article_title", 0) document.section = nestget(article_meta, "pub_subject", 0) authors = [] valid_contrib_types = [ "author", "editor", "organizer", "translator", "autor", "compiler", ] for contrib in contribs: if nestget(contrib, "contrib_type", 0) in valid_contrib_types: authors.append("%s, %s" % ( nestget(contrib, "contrib_surname", 0), nestget(contrib, "contrib_given_names", 0), )) document.authors = authors document.abstract = nestget(article_meta, "abstract", 0) publisher_id = nestget(article_meta, "article_publisher_id", 0) document._id = publisher_id document.aid = publisher_id document.pid = nestget(article_meta, "article_publisher_id", 1) document.doi = nestget(article_meta, "article_doi", 0) original_lang = nestget(article, "lang", 0) # article.languages contém todas as traduções do artigo e o idioma original languages = [original_lang] trans_titles = [] trans_sections = [] trans_abstracts = [] trans_sections.append( models.TranslatedSection( **{ "name": nestget(article_meta, "pub_subject", 0), "language": original_lang, })) trans_abstracts.append( models.Abstract(**{ "text": document.abstract, "language": original_lang })) if data.get("trans_abstract"): for trans_abs in data.get("trans_abstract"): trans_abstracts.append( models.Abstract( **{ "text": nestget(trans_abs, "text", 0), "language": nestget(trans_abs, "lang", 0), })) keywords = [] for sub in sub_articles: lang = nestget(sub, "article", 0, "lang", 0) languages.append(lang) trans_titles.append( models.TranslatedTitle( **{ "name": nestget(sub, "article_meta", 0, "article_title", 0), "language": lang, })) trans_sections.append( models.TranslatedSection( **{ "name": nestget(sub, "article_meta", 0, "pub_subject", 0), "language": lang, })) trans_abstracts.append( models.Abstract( **{ "text": nestget(sub, "article_meta", 0, "abstract_p", 0), "language": lang, })) if data.get("kwd_group"): for kwd_group in nestget(data, "kwd_group"): keywords.append( models.ArticleKeyword( **{ "keywords": nestget(kwd_group, "kwd", default=[]), "language": nestget(kwd_group, "lang", 0), })) document.languages = languages document.translated_titles = trans_titles document.sections = trans_sections document.abstracts = trans_abstracts document.keywords = keywords document.abstract_languages = [ trans_abs["language"] for trans_abs in trans_abstracts ] document.original_language = original_lang document.publication_date = nestget(pub_date, "text", 0) document.type = nestget(article, "type", 0) document.elocation = nestget(article_meta, "pub_elocation", 0) document.fpage = nestget(article_meta, "pub_fpage", 0) document.fpage_sequence = nestget(article_meta, "pub_fpage_seq", 0) document.lpage = nestget(article_meta, "pub_lpage", 0) issue = models.Issue.objects.get(_id=issue_id) document.issue = issue document.journal = issue.journal document.order = i_documents.get(issue.id).index(document_id) document.xml = "%s/documents/%s" % (api_hook.base_url, document._id) document.save() return document