def _check_journal_meta(self, metadata): """ This method will check the given metadata and retrieve a new dictionary with some new fields. """ journal = Article({'title': metadata, 'article': {}, 'citations': {}}) issns = set([journal.any_issn(priority=u'electronic'), journal.any_issn(priority=u'print')]) metadata['code'] = list(issns) metadata['collection'] = journal.collection_acronym return metadata
def document(self, code, collection=None, replace_journal_metadata=True, fmt='xylose'): query = { 'code': code, 'replace_journal_metadata': replace_journal_metadata, 'fmt': fmt } if collection: query['collection'] = collection try: article = self.client.get_article(**query) except: msg = 'Error retrieving document: %s_%s' % (collection, code) raise ServerError(msg) if fmt == 'xylose': jarticle = json.loads(article) xarticle = Article(jarticle) logger.info('Document loaded: %s_%s' % (collection, code)) return xarticle else: logger.info('Document loaded: %s_%s' % (collection, code)) return article
def test_should_return_none_if_no_document_dates(self): metadata = { "article": {"v65": [{"_": "19970300"}],}, } article = Article(metadata) document_pubdate, __ = conversion.get_article_dates(article) self.assertIsNone(document_pubdate)
def test_should_return_creation_date_if_no_document_publication_date(self): metadata = { "article": {"v65": [{"_": "19970300"}], "v93": [{"_": "20000401"}],}, } article = Article(metadata) document_pubdate, __ = conversion.get_article_dates(article) self.assertEqual(document_pubdate, ("2000", "04", "01"))
def test_should_return_document_publication_date_if_it_is_presente(self): metadata = { "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],}, } article = Article(metadata) document_pubdate, __ = conversion.get_article_dates(article) self.assertEqual(document_pubdate, ("2020", "01", "24"))
def test_should_return_issue_publication_date_if_it_is_presente(self): metadata = { "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],}, } article = Article(metadata) __, issue_pubdate = conversion.get_article_dates(article) self.assertEqual(issue_pubdate, ("1997", "03", ""))
def query_by_pid(coll, pid): article = load_article(coll, pid) if not article: return None title_keys = load_article_title_keys(article) query = coll.find({'citations_keys': { '$in': title_keys }}, { 'article': 1, 'title': 1, 'collection': 1 }) citations = None if query: citations = [] for doc in query: citation = Article(doc) meta = load_document_meta(citation) citations.append(meta) article_meta = load_document_meta(article) return {'article': article_meta, 'cited_by': citations}
def query_by_meta(coll, title='', author='', year=''): article_meta = {} article_meta['title'] = title article_meta['author'] = author article_meta['year'] = year title_key = preparing_key(title, author, year) if not title_key: return None query = coll.find({'citations_keys': title_key}, { 'article': 1, 'title': 1, 'collection': 1 }) citations = None if query: citations = [] for doc in query: citation = Article(doc) meta = load_document_meta(citation) citations.append(meta) return {'article': article_meta, 'cited_by': citations}
def test_xml_start_page_pipe(self): fakexylosearticle = Article({ 'article': { "v14": [{ "l": "649", "_": "", "f": "639" }] }, 'title': {} }) pxml = ET.Element('doc') data = [fakexylosearticle, pxml] xmlarticle = pipeline_xml.StartPage() raw, xml = xmlarticle.transform(data) # This try except is a trick to test the expected result of the # piped XML, once the precond method don't raise an exception # we try to check if the preconditioned pipe was called or not. try: xml.find('./field[name="start_page"]').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def pipeline_sci(self): xylose_article = Article(self._article) ppl = plumber.Pipeline( export_sci.SetupArticlePipe(), export_sci.XMLArticlePipe(), export_sci.XMLFrontPipe(), export_sci.XMLJournalMetaJournalIdPipe(), export_sci.XMLJournalMetaJournalTitleGroupPipe(), export_sci.XMLJournalMetaISSNPipe(), export_sci.XMLJournalMetaCollectionPipe(), export_sci.XMLJournalMetaPublisherPipe(), export_sci.XMLArticleMetaUniqueArticleIdPipe(), export_sci.XMLArticleMetaArticleIdPublisherPipe(), export_sci.XMLArticleMetaArticleIdDOIPipe(), export_sci.XMLArticleMetaArticleCategoriesPipe(), export_sci.XMLArticleMetaTitleGroupPipe(), export_sci.XMLArticleMetaTranslatedTitleGroupPipe(), export_sci.XMLArticleMetaContribGroupPipe(), export_sci.XMLArticleMetaAffiliationPipe(), export_sci.XMLArticleMetaDatesInfoPipe(), export_sci.XMLArticleMetaIssueInfoPipe(), export_sci.XMLArticleMetaElocationInfoPipe(), export_sci.XMLArticleMetaPagesInfoPipe(), export_sci.XMLArticleMetaPermissionPipe(), export_sci.XMLArticleMetaURLsPipe(), export_sci.XMLArticleMetaAbstractsPipe(), export_sci.XMLArticleMetaKeywordsPipe(), export_sci.XMLArticleMetaCitationsPipe(), export_sci.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def load_documents(collection, articlemeta_db, all_records=False): """ Carrega dos documentos da base de dados mongodb do AM. """ fltr = { 'collection': collection } if all_records is False: fltr['fulltexts'] = {'$exists': 0} documents = articlemeta_db['articles'].find( fltr, {'code': 1}, no_cursor_timeout=True ) pids = [] for document in documents: pids.append(document['code']) if 'fulltexts' in fltr: del(fltr['fulltexts']) for pid in pids: fltr['code'] = pid document = articlemeta_db['articles'].find_one( fltr, {'_id': 0, 'citations': 0} ) yield Article(document) documents.close()
def document(self, code, collection, replace_journal_metadata=True, fmt='xylose', body=False): article = self.dispatcher('get_article', code=code, collection=collection, replace_journal_metadata=True, fmt=fmt, body=body) if not article: logger.info('Document not found for: %s_%s', collection, code) return None if fmt in ['xylose', 'opac']: jarticle = None try: jarticle = json.loads(article) except: msg = 'Fail to load JSON when retrienving document: %s_%s' % ( collection, code) raise ValueError(msg) xarticle = Article(jarticle) logger.info('Document loaded: %s_%s', collection, code) return xarticle logger.info('Document loaded: %s_%s', collection, code) return article
def pipeline_crossref(self): xylose_article = Article(self._article) ppl = plumber.Pipeline( export_crossref.SetupDoiBatchPipe(), export_crossref.XMLHeadPipe(), export_crossref.XMLBodyPipe(), export_crossref.XMLDoiBatchIDPipe(), export_crossref.XMLTimeStampPipe(), export_crossref.XMLDepositorPipe(), export_crossref.XMLRegistrantPipe(), export_crossref.XMLJournalPipe(), export_crossref.XMLJournalMetadataPipe(), export_crossref.XMLJournalTitlePipe(), export_crossref.XMLAbbreviatedJournalTitlePipe(), export_crossref.XMLISSNPipe(), export_crossref.XMLJournalIssuePipe(), export_crossref.XMLPubDatePipe(), export_crossref.XMLVolumePipe(), export_crossref.XMLIssuePipe(), export_crossref.XMLJournalArticlePipe(), export_crossref.XMLArticleTitlesPipe(), export_crossref.XMLArticleTitlePipe(), export_crossref.XMLArticleContributorsPipe(), export_crossref.XMLArticleAbstractPipe(), export_crossref.XMLArticlePubDatePipe(), export_crossref.XMLPagesPipe(), export_crossref.XMLPIDPipe(), export_crossref.XMLDOIDataPipe(), export_crossref.XMLClosePipe() ) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_doaj(self): xylose_article = Article(self._article, iso_format='iso 639-2') ppl = plumber.Pipeline(export_doaj.SetupArticlePipe(), export_doaj.XMLArticlePipe(), export_doaj.XMLJournalMetaPublisherPipe(), export_doaj.XMLJournalMetaJournalTitlePipe(), export_doaj.XMLJournalMetaISSNPipe(), export_doaj.XMLArticleMetaPublicationDatePipe(), export_doaj.XMLArticleMetaVolumePipe(), export_doaj.XMLArticleMetaIssuePipe(), export_doaj.XMLArticleMetaStartPagePipe(), export_doaj.XMLArticleMetaEndPagePipe(), export_doaj.XMLArticleMetaArticleIdDOIPipe(), export_doaj.XMLArticleMetaIdPipe(), export_doaj.XMLArticleMetaDocumentTypePipe(), export_doaj.XMLArticleMetaTitlePipe(), export_doaj.XMLArticleMetaAuthorsPipe(), export_doaj.XMLArticleMetaAffiliationPipe(), export_doaj.XMLArticleMetaAbstractsPipe(), export_doaj.XMLArticleMetaFullTextUrlPipe(), export_doaj.XMLArticleMetaKeywordsPipe(), export_doaj.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def test_xml_document_multiple_wok_subject_categories_pipe(self): fakexylosearticle = Article({ 'article': {}, 'title': { 'v854': [{ '_': 'Cat 1' }, { '_': 'Cat 2' }] } }) pxml = ET.Element('doc') data = [fakexylosearticle, pxml] xmlarticle = pipeline_xml.WOKSC() raw, xml = xmlarticle.transform(data) result = ', '.join([ i.text for i in xml.findall('./field[@name="wok_subject_categories"]') ]) self.assertEqual(u'Cat 1, Cat 2', result)
def test_xmlarticle_meta_contrib_group_author_without_xrefs_pipe(self): del (self._raw_json['article']['v71']) article_meta = Article(self._raw_json) pxml = ET.Element('article') pxml.append(ET.Element('front')) front = pxml.find('front') front.append(ET.Element('article-meta')) data = [self._article_meta, pxml] xmlarticle = export_rsps.XMLArticleMetaContribGroupPipe() raw, xml = xmlarticle.transform(data) fullnames = [ i.get('rid') for i in xml.findall( './front/article-meta/contrib-group/contrib/xref') ] self.assertEqual([ u'aff01', u'aff01', u'aff01', u'aff01', u'aff01', u'aff01', u'aff02', u'aff01', u'aff02', u'aff01', u'aff03' ], fullnames)
def test_xml_article_body_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'title': {} }) pxml = ET.Element('article') data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLBodyPipe() raw, xml = xmlarticle.transform(data) try: xml.find('./body/p').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def test_xmlarticle_meta_article_categories_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'section': { u'pt': u'label pt', u'es': u'label es' } }) pxml = ET.Element('article') pxml.append(ET.Element('front')) front = pxml.find('front') front.append(ET.Element('article-meta')) data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLArticleMetaArticleCategoriesPipe() raw, xml = xmlarticle.transform(data) categories = xml.find( './front/article-meta/article-categories/subj-group[@subj-group-type="heading"]/subject' ).text self.assertEqual(u'label pt', categories)
def test_xml_article_meta_article_id_doi_without_data_pipe(self): fakexylosearticle = Article({'article': {}, 'title': {}}) pxml = ET.Element('article') pxml.append(ET.Element('front')) front = pxml.find('front') front.append(ET.Element('article-meta')) data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLArticleMetaArticleIdDOIPipe() raw, xml = xmlarticle.transform(data) # This try except is a trick to test the expected result of the # piped XML, once the precond method don't raise an exception # we try to check if the preconditioned pipe was called or not. try: xml.find( './front/article-meta/article-id[@pub-id-type="doi"]').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def test_xml_article_body_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'title': {}, 'body': { 'pt': 'body pt', 'es': 'body es' } }) pxml = ET.Element('article') data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLBodyPipe() raw, xml = xmlarticle.transform(data) body = xml.find('./body/p').text self.assertEqual('body pt', body)
def test_xml_journal_title_pipe(self): fakexylosearticle = Article({ 'article': {}, 'title': { "v100": [{ "_": "Revista de Sa\u00fade P\u00fablica" }] } }) pxml = ET.Element('doc') data = [fakexylosearticle, pxml] xmlarticle = pipeline_xml.JournalTitle() raw, xml = xmlarticle.transform(data) # This try except is a trick to test the expected result of the # piped XML, once the precond method don't raise an exception # we try to check if the preconditioned pipe was called or not. try: xml.find('./field[name="journal"]').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def test_xml_citation_date_with_year_and_month_and_day_pipe(self): fakexylosearticle = Article({ 'article': {}, 'title': {}, 'citations': [{ 'v65': [{ '_': '20060430' }] }] }).citations[0] pxml = ET.Element('ref') pxml.append(ET.Element('element-citation')) data = [fakexylosearticle, pxml] raw, xml = self._xmlcitation.DatePipe().transform(data) expected_year = xml.find('./element-citation/date/year').text expected_month = xml.find('./element-citation/date/month').text expected_day = xml.find('./element-citation/date/day').text self.assertEqual(u'2006', expected_year) self.assertEqual(u'04', expected_month) self.assertEqual(u'30', expected_day)
def document(self, code, collection, replace_journal_metadata=True, fmt='xylose'): try: article = self.client.get_article(code=code, collection=collection, replace_journal_metadata=True, fmt=fmt) except self.ARTICLEMETA_THRIFT.ServerError as e: msg = 'Error retrieving document: %s_%s' % (collection, code) raise ServerError(msg) if not article: logger.warning('Document not found for: %s_%s' % (collection, code)) return None if fmt == 'xylose': jarticle = None try: jarticle = json.loads(article) except: msg = 'Fail to load JSON when retrienving document: %s_%s' % ( collection, code) raise ValueError(msg) xarticle = Article(jarticle) logger.info('Document loaded: %s_%s' % (collection, code)) return xarticle logger.info('Document loaded: %s_%s' % (collection, code)) return article
def test_xmlarticle_meta_translated_abstract_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'title': {} }) pxml = ET.Element('article') pxml.append(ET.Element('front')) front = pxml.find('front') front.append(ET.Element('article-meta')) data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLArticleMetaAbstractsPipe() raw, xml = xmlarticle.transform(data) abstract = xml.find('./front/article-meta/trans-abstract/p') self.assertEqual(None, abstract)
def pipeline_pubmed(self): xylose_article = Article(self._article, iso_format='iso 639-2') ppl = plumber.Pipeline(export_pubmed.SetupArticleSetPipe(), export_pubmed.XMLArticlePipe(), export_pubmed.XMLJournalPipe(), export_pubmed.XMLPublisherNamePipe(), export_pubmed.XMLJournalTitlePipe(), export_pubmed.XMLISSNPipe(), export_pubmed.XMLVolumePipe(), export_pubmed.XMLIssuePipe(), export_pubmed.XMLPubDatePipe(), export_pubmed.XMLReplacesPipe(), export_pubmed.XMLArticleTitlePipe(), export_pubmed.XMLFirstPagePipe(), export_pubmed.XMLLastPagePipe(), export_pubmed.XMLElocationIDPipe(), export_pubmed.XMLLanguagePipe(), export_pubmed.XMLAuthorListPipe(), export_pubmed.XMLPublicationTypePipe(), export_pubmed.XMLArticleIDListPipe(), export_pubmed.XMLHistoryPipe(), export_pubmed.XMLAbstractPipe(), export_pubmed.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def pipeline_rsps(self): xylose_article = Article(self._article) ppl = plumber.Pipeline(export_rsps.SetupArticlePipe(), export_rsps.XMLArticlePipe(), export_rsps.XMLFrontPipe(), export_rsps.XMLJournalMetaJournalIdPipe(), export_rsps.XMLJournalMetaJournalTitleGroupPipe(), export_rsps.XMLJournalMetaISSNPipe(), export_rsps.XMLJournalMetaPublisherPipe(), export_rsps.XMLArticleMetaArticleIdPublisherPipe(), export_rsps.XMLArticleMetaArticleIdDOIPipe(), export_rsps.XMLArticleMetaArticleCategoriesPipe(), export_rsps.XMLArticleMetaTitleGroupPipe(), export_rsps.XMLArticleMetaTranslatedTitleGroupPipe(), export_rsps.XMLArticleMetaContribGroupPipe(), export_rsps.XMLArticleMetaAffiliationPipe(), export_rsps.XMLArticleMetaGeneralInfoPipe(), export_rsps.XMLArticleMetaHistoryPipe(), export_rsps.XMLArticleMetaPermissionPipe(), export_rsps.XMLArticleMetaAbstractsPipe(), export_rsps.XMLArticleMetaKeywordsPipe(), export_rsps.XMLArticleMetaCountsPipe(), export_rsps.XMLBodyPipe(), export_rsps.XMLArticleMetaCitationsPipe(), export_rsps.XMLSubArticlePipe(), export_rsps.XMLClosePipe()) transformed_data = ppl.run(xylose_article, rewrap=True) return next(transformed_data)
def test_xmlarticle_meta_keywords_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'title': {} }) pxml = ET.Element('records') pxml.append(ET.Element('record')) data = [fakexylosearticle, pxml] xmlarticle = export_doaj.XMLArticleMetaKeywordsPipe() raw, xml = xmlarticle.transform(data) try: xml.find('./record/keywords').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def test_xmlarticle_meta_general_info_fulltext_uri_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v65': [{ '_': '201008' }] }, 'title': {} }) pxml = ET.Element('records') pxml.append(ET.Element('record')) data = [fakexylosearticle, pxml] xmlarticle = export_doaj.XMLArticleMetaFullTextUrlPipe() raw, xml = xmlarticle.transform(data) try: xml.find('./record/issue').text except AttributeError: self.assertTrue(True) else: self.assertTrue(False)
def load_documents(collection, all_records=False): fltr = { 'collection': collection } if all_records == False: fltr['body'] = {'$exists': 0} documents = articlemeta_db['articles'].find( fltr, {'code': 1}, ) if 'body' in fltr: del(fltr['body']) pids = [] for document in documents: pids.append(document['code']) for pid in pids: fltr['code'] = pid document = articlemeta_db['articles'].find_one( fltr, {'_id': 0, 'citations': 0} ) yield Article(document) documents.close()
def load_documents(collection, all_records=False): fltr = {'collection': collection} if all_records is False: fltr['license'] = {'$exists': 0} documents = articlemeta_db['articles'].find(fltr, {'code': 1}) pids = [] for document in documents: pids.append(document['code']) if 'license' in fltr: del (fltr['license']) for pid in pids: fltr['code'] = pid document = articlemeta_db['articles'].find_one(fltr, { '_id': 0, 'citations': 0 }) yield Article(document) documents.close() # Release the cursor once it has no timeout.
def test_xmlarticle_meta_keywords_without_data_pipe(self): fakexylosearticle = Article({ 'article': { 'v40': [{ '_': 'pt' }] }, 'title': {} }) pxml = ET.Element('article') pxml.append(ET.Element('front')) front = pxml.find('front') front.append(ET.Element('article-meta')) data = [fakexylosearticle, pxml] xmlarticle = export_rsps.XMLArticleMetaKeywordsPipe() raw, xml = xmlarticle.transform(data) keywords_language = xml.find('./front/article-meta/kwd-group') self.assertEqual(None, keywords_language)
def _check_article_meta(self, metadata): """ This method will check the given metadata and retrieve a new dictionary with some new fields. """ article = Article(metadata) issns = set([article.any_issn(priority=u'electronic'), article.any_issn(priority=u'print')]) metadata['code_issue'] = article.publisher_id[1:18] metadata['code_title'] = list(issns) metadata['collection'] = article.collection_acronym metadata['document_type'] = article.document_type metadata['publication_year'] = article.publication_date[0:4] metadata['validated_scielo'] = 'False' metadata['validated_wos'] = 'False' metadata['sent_wos'] = 'False' metadata['sent_doaj'] = 'False' metadata['applicable'] = 'False' try: metadata['processing_date'] = article.processing_date except: if article.publication_date > datetime.now().date().isoformat(): metadata['processing_date'] = datetime.now().date().isoformat() gtk = gen_title_keys(article) if gtk: metadata.update(gtk) gctk = gen_citations_title_keys(article) if gctk: metadata.update(gctk) return metadata
def get_solr_args_from_article(document, indexed_date): article = Article(document) original_title = article.original_title() if original_title is not None: original_title = original_title try: # publication_date format maybe yyyy-mm-dd publication_date = datetime.strptime(article.publication_date, '%Y-%m-%d').isoformat() except ValueError: try: # publication_date format maybe yyyy-mm publication_date = datetime.strptime("{0}-01".format(article.publication_date), '%Y-%m-%d').isoformat() except ValueError: # publication_date format maybe yyyy publication_date = datetime.strptime("{0}-01-01".format(article.publication_date), '%Y-%m-%d').isoformat() article_languages = article.languages() languages = [] for l in article_languages: languages.append(l) article_authors = article.authors authors = [] if article_authors is not None: for author in article_authors: author_name = u"{0} {1}".format(author["given_names"], author["surname"]) authors.append(remove_control_chars(author_name)) article_first_author = article.first_author if article_first_author is not None: first_author = remove_control_chars(u"{0} {1}".format(article_first_author["given_names"], article_first_author["surname"])) else: first_author = "" #Start - Insert categories and magazines # print ('Start - Insert categories and magazines') magazine_name = remove_control_chars(u"{0}".format(article.journal.title)) magazine_issn = article.journal.scielo_issn magazine_abbreviated_title = remove_control_chars(article.journal.abbreviated_title) magazine_domain = article.scielo_domain magazine_acronym = article.journal.acronym try: magazine = Magazine.objects.get(magazine_name=magazine_name) except Magazine.DoesNotExist: magazine = Magazine.objects.create(magazine_name=magazine_name, magazine_abbreviated_title=magazine_abbreviated_title, magazine_issn=magazine_issn, magazine_domain=magazine_domain, magazine_acronym=magazine_acronym) magazine.save() category_ids = [] if article.journal.subject_areas is not None: for item_category in article.journal.subject_areas: category_name = remove_control_chars(u"{0}".format(item_category)).title() try: category = Category.objects.get(category_name_en=category_name) except Category.DoesNotExist: category = Category.objects.create(category_name_en=category_name) category.save() category_ids.append(category.id) category_publication_relationship = False for category_loop in magazine.categories.all(): if category_loop.category_name_en == category_name: category_publication_relationship = True break if not category_publication_relationship: magazine.categories.add(category) magazine.save() # print ('End - Insert categories and magazines') # End - Insert categories and magazines args = { "id": u"{0}{1}".format(article.publisher_id, article.collection_acronym), # "scielo_issn": article.journal.scielo_issn, "any_issn": article.journal.any_issn(), "journal_title": remove_control_chars(article.journal.title), # Magazine "journal_id": magazine.id, "journal_volume": article.volume, "journal_number": article.issue, # "journal_abbreviated_title": remove_control_chars(article.journal.abbreviated_title), "original_title": remove_control_chars(original_title), "original_abstract": remove_control_chars(article.original_abstract()), "publication_date": "{0}Z".format(publication_date), # "journal_acronym": article.journal.acronym, "subject_areas": article.journal.subject_areas, # Categories "subject_areas_ids": category_ids, # Category ids "wos_subject_areas": article.journal.wos_subject_areas, "original_language": article.original_language(), "languages": languages, "document_type": article.document_type, "authors": authors, "first_author": first_author, "corporative_authors": article.corporative_authors, # "scielo_domain": article.scielo_domain, "publisher_id": article.publisher_id, "collection_acronym": article.collection_acronym, "indexed_date": indexed_date } # Adding cover if reindexing or updating. try: cover_article = CoverArticle.objects.get(article_id=args[u"id"]) args[u"image_upload_path"] = cover_article.image args[u"image_upload_date"] = cover_article.upload_time args[u"image_uploader"] = cover_article.administrator.name except CoverArticle.DoesNotExist: pass article_translated_abstracts = article.translated_abstracts() if article_translated_abstracts is not None: for language in article_translated_abstracts: args[u"translated_abstracts_{0}".format(language)] = remove_control_chars(article_translated_abstracts[language]) article_translated_titles = article.translated_titles() if article_translated_titles is not None: for language in article_translated_titles: args[u"translated_titles_{0}".format(language)] = remove_control_chars(article_translated_titles[language]) article_keywords = article.keywords() if article_keywords is not None: for language in article_keywords: keywords = [] for keyword in article_keywords[language]: keywords.append(remove_control_chars(keyword)) args[u"keywords_{0}".format(language)] = keywords return args