Esempio n. 1
0
    def _check_journal_meta(self, metadata):
        """
            This method will check the given metadata and retrieve
            a new dictionary with some new fields.
        """
        journal = Article({'title': metadata, 'article': {}, 'citations': {}})

        issns = set([journal.any_issn(priority=u'electronic'),
                     journal.any_issn(priority=u'print')])

        metadata['code'] = list(issns)
        metadata['collection'] = journal.collection_acronym

        return metadata
Esempio n. 2
0
    def document(self,
                 code,
                 collection=None,
                 replace_journal_metadata=True,
                 fmt='xylose'):

        query = {
            'code': code,
            'replace_journal_metadata': replace_journal_metadata,
            'fmt': fmt
        }

        if collection:
            query['collection'] = collection

        try:
            article = self.client.get_article(**query)
        except:
            msg = 'Error retrieving document: %s_%s' % (collection, code)
            raise ServerError(msg)

        if fmt == 'xylose':
            jarticle = json.loads(article)
            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s' % (collection, code))
            return xarticle
        else:
            logger.info('Document loaded: %s_%s' % (collection, code))
            return article
Esempio n. 3
0
 def test_should_return_none_if_no_document_dates(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertIsNone(document_pubdate)
Esempio n. 4
0
 def test_should_return_creation_date_if_no_document_publication_date(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v93": [{"_": "20000401"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertEqual(document_pubdate, ("2000", "04", "01"))
Esempio n. 5
0
 def test_should_return_document_publication_date_if_it_is_presente(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertEqual(document_pubdate, ("2020", "01", "24"))
Esempio n. 6
0
 def test_should_return_issue_publication_date_if_it_is_presente(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],},
     }
     article = Article(metadata)
     __, issue_pubdate = conversion.get_article_dates(article)
     self.assertEqual(issue_pubdate, ("1997", "03", ""))
Esempio n. 7
0
def query_by_pid(coll, pid):

    article = load_article(coll, pid)

    if not article:
        return None

    title_keys = load_article_title_keys(article)
    query = coll.find({'citations_keys': {
        '$in': title_keys
    }}, {
        'article': 1,
        'title': 1,
        'collection': 1
    })

    citations = None
    if query:
        citations = []
        for doc in query:
            citation = Article(doc)
            meta = load_document_meta(citation)
            citations.append(meta)

    article_meta = load_document_meta(article)

    return {'article': article_meta, 'cited_by': citations}
Esempio n. 8
0
def query_by_meta(coll, title='', author='', year=''):

    article_meta = {}
    article_meta['title'] = title
    article_meta['author'] = author
    article_meta['year'] = year

    title_key = preparing_key(title, author, year)

    if not title_key:
        return None

    query = coll.find({'citations_keys': title_key}, {
        'article': 1,
        'title': 1,
        'collection': 1
    })

    citations = None
    if query:
        citations = []
        for doc in query:
            citation = Article(doc)
            meta = load_document_meta(citation)
            citations.append(meta)

    return {'article': article_meta, 'cited_by': citations}
Esempio n. 9
0
    def test_xml_start_page_pipe(self):

        fakexylosearticle = Article({
            'article': {
                "v14": [{
                    "l": "649",
                    "_": "",
                    "f": "639"
                }]
            },
            'title': {}
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.StartPage()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find('./field[name="start_page"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Esempio n. 10
0
    def pipeline_sci(self):
        xylose_article = Article(self._article)

        ppl = plumber.Pipeline(
            export_sci.SetupArticlePipe(), export_sci.XMLArticlePipe(),
            export_sci.XMLFrontPipe(),
            export_sci.XMLJournalMetaJournalIdPipe(),
            export_sci.XMLJournalMetaJournalTitleGroupPipe(),
            export_sci.XMLJournalMetaISSNPipe(),
            export_sci.XMLJournalMetaCollectionPipe(),
            export_sci.XMLJournalMetaPublisherPipe(),
            export_sci.XMLArticleMetaUniqueArticleIdPipe(),
            export_sci.XMLArticleMetaArticleIdPublisherPipe(),
            export_sci.XMLArticleMetaArticleIdDOIPipe(),
            export_sci.XMLArticleMetaArticleCategoriesPipe(),
            export_sci.XMLArticleMetaTitleGroupPipe(),
            export_sci.XMLArticleMetaTranslatedTitleGroupPipe(),
            export_sci.XMLArticleMetaContribGroupPipe(),
            export_sci.XMLArticleMetaAffiliationPipe(),
            export_sci.XMLArticleMetaDatesInfoPipe(),
            export_sci.XMLArticleMetaIssueInfoPipe(),
            export_sci.XMLArticleMetaElocationInfoPipe(),
            export_sci.XMLArticleMetaPagesInfoPipe(),
            export_sci.XMLArticleMetaPermissionPipe(),
            export_sci.XMLArticleMetaURLsPipe(),
            export_sci.XMLArticleMetaAbstractsPipe(),
            export_sci.XMLArticleMetaKeywordsPipe(),
            export_sci.XMLArticleMetaCitationsPipe(),
            export_sci.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Esempio n. 11
0
def load_documents(collection, articlemeta_db, all_records=False):
    """
    Carrega dos documentos da base de dados mongodb do AM.
    """
    fltr = {
        'collection': collection
    }

    if all_records is False:
        fltr['fulltexts'] = {'$exists': 0}

    documents = articlemeta_db['articles'].find(
        fltr,
        {'code': 1}, no_cursor_timeout=True
    )

    pids = []

    for document in documents:
        pids.append(document['code'])

    if 'fulltexts' in fltr:
        del(fltr['fulltexts'])

    for pid in pids:

        fltr['code'] = pid
        document = articlemeta_db['articles'].find_one(
            fltr,
            {'_id': 0, 'citations': 0}
        )

        yield Article(document)

    documents.close()
Esempio n. 12
0
    def document(self,
                 code,
                 collection,
                 replace_journal_metadata=True,
                 fmt='xylose',
                 body=False):

        article = self.dispatcher('get_article',
                                  code=code,
                                  collection=collection,
                                  replace_journal_metadata=True,
                                  fmt=fmt,
                                  body=body)

        if not article:
            logger.info('Document not found for: %s_%s', collection, code)
            return None

        if fmt in ['xylose', 'opac']:
            jarticle = None
            try:
                jarticle = json.loads(article)
            except:
                msg = 'Fail to load JSON when retrienving document: %s_%s' % (
                    collection, code)
                raise ValueError(msg)

            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s', collection, code)

            return xarticle

        logger.info('Document loaded: %s_%s', collection, code)
        return article
Esempio n. 13
0
    def pipeline_crossref(self):
        xylose_article = Article(self._article)

        ppl = plumber.Pipeline(
            export_crossref.SetupDoiBatchPipe(),
            export_crossref.XMLHeadPipe(),
            export_crossref.XMLBodyPipe(),
            export_crossref.XMLDoiBatchIDPipe(),
            export_crossref.XMLTimeStampPipe(),
            export_crossref.XMLDepositorPipe(),
            export_crossref.XMLRegistrantPipe(),
            export_crossref.XMLJournalPipe(),
            export_crossref.XMLJournalMetadataPipe(),
            export_crossref.XMLJournalTitlePipe(),
            export_crossref.XMLAbbreviatedJournalTitlePipe(),
            export_crossref.XMLISSNPipe(),
            export_crossref.XMLJournalIssuePipe(),
            export_crossref.XMLPubDatePipe(),
            export_crossref.XMLVolumePipe(),
            export_crossref.XMLIssuePipe(),
            export_crossref.XMLJournalArticlePipe(),
            export_crossref.XMLArticleTitlesPipe(),
            export_crossref.XMLArticleTitlePipe(),
            export_crossref.XMLArticleContributorsPipe(),
            export_crossref.XMLArticleAbstractPipe(),
            export_crossref.XMLArticlePubDatePipe(),
            export_crossref.XMLPagesPipe(),
            export_crossref.XMLPIDPipe(),
            export_crossref.XMLDOIDataPipe(),
            export_crossref.XMLClosePipe()
        )

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Esempio n. 14
0
    def pipeline_doaj(self):
        xylose_article = Article(self._article, iso_format='iso 639-2')

        ppl = plumber.Pipeline(export_doaj.SetupArticlePipe(),
                               export_doaj.XMLArticlePipe(),
                               export_doaj.XMLJournalMetaPublisherPipe(),
                               export_doaj.XMLJournalMetaJournalTitlePipe(),
                               export_doaj.XMLJournalMetaISSNPipe(),
                               export_doaj.XMLArticleMetaPublicationDatePipe(),
                               export_doaj.XMLArticleMetaVolumePipe(),
                               export_doaj.XMLArticleMetaIssuePipe(),
                               export_doaj.XMLArticleMetaStartPagePipe(),
                               export_doaj.XMLArticleMetaEndPagePipe(),
                               export_doaj.XMLArticleMetaArticleIdDOIPipe(),
                               export_doaj.XMLArticleMetaIdPipe(),
                               export_doaj.XMLArticleMetaDocumentTypePipe(),
                               export_doaj.XMLArticleMetaTitlePipe(),
                               export_doaj.XMLArticleMetaAuthorsPipe(),
                               export_doaj.XMLArticleMetaAffiliationPipe(),
                               export_doaj.XMLArticleMetaAbstractsPipe(),
                               export_doaj.XMLArticleMetaFullTextUrlPipe(),
                               export_doaj.XMLArticleMetaKeywordsPipe(),
                               export_doaj.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Esempio n. 15
0
    def test_xml_document_multiple_wok_subject_categories_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {
                'v854': [{
                    '_': 'Cat 1'
                }, {
                    '_': 'Cat 2'
                }]
            }
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.WOKSC()
        raw, xml = xmlarticle.transform(data)

        result = ', '.join([
            i.text
            for i in xml.findall('./field[@name="wok_subject_categories"]')
        ])

        self.assertEqual(u'Cat 1, Cat 2', result)
    def test_xmlarticle_meta_contrib_group_author_without_xrefs_pipe(self):

        del (self._raw_json['article']['v71'])
        article_meta = Article(self._raw_json)

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [self._article_meta, pxml]

        xmlarticle = export_rsps.XMLArticleMetaContribGroupPipe()
        raw, xml = xmlarticle.transform(data)

        fullnames = [
            i.get('rid') for i in xml.findall(
                './front/article-meta/contrib-group/contrib/xref')
        ]

        self.assertEqual([
            u'aff01', u'aff01', u'aff01', u'aff01', u'aff01', u'aff01',
            u'aff02', u'aff01', u'aff02', u'aff01', u'aff03'
        ], fullnames)
    def test_xml_article_body_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLBodyPipe()

        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./body/p').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
    def test_xmlarticle_meta_article_categories_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'section': {
                u'pt': u'label pt',
                u'es': u'label es'
            }
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaArticleCategoriesPipe()
        raw, xml = xmlarticle.transform(data)

        categories = xml.find(
            './front/article-meta/article-categories/subj-group[@subj-group-type="heading"]/subject'
        ).text

        self.assertEqual(u'label pt', categories)
    def test_xml_article_meta_article_id_doi_without_data_pipe(self):

        fakexylosearticle = Article({'article': {}, 'title': {}})

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaArticleIdDOIPipe()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find(
                './front/article-meta/article-id[@pub-id-type="doi"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
    def test_xml_article_body_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {},
            'body': {
                'pt': 'body pt',
                'es': 'body es'
            }
        })

        pxml = ET.Element('article')

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLBodyPipe()

        raw, xml = xmlarticle.transform(data)

        body = xml.find('./body/p').text

        self.assertEqual('body pt', body)
Esempio n. 21
0
    def test_xml_journal_title_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {
                "v100": [{
                    "_": "Revista de Sa\u00fade P\u00fablica"
                }]
            }
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.JournalTitle()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find('./field[name="journal"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
    def test_xml_citation_date_with_year_and_month_and_day_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {},
            'citations': [{
                'v65': [{
                    '_': '20060430'
                }]
            }]
        }).citations[0]

        pxml = ET.Element('ref')
        pxml.append(ET.Element('element-citation'))

        data = [fakexylosearticle, pxml]

        raw, xml = self._xmlcitation.DatePipe().transform(data)

        expected_year = xml.find('./element-citation/date/year').text
        expected_month = xml.find('./element-citation/date/month').text
        expected_day = xml.find('./element-citation/date/day').text

        self.assertEqual(u'2006', expected_year)
        self.assertEqual(u'04', expected_month)
        self.assertEqual(u'30', expected_day)
Esempio n. 23
0
    def document(self,
                 code,
                 collection,
                 replace_journal_metadata=True,
                 fmt='xylose'):
        try:
            article = self.client.get_article(code=code,
                                              collection=collection,
                                              replace_journal_metadata=True,
                                              fmt=fmt)
        except self.ARTICLEMETA_THRIFT.ServerError as e:
            msg = 'Error retrieving document: %s_%s' % (collection, code)
            raise ServerError(msg)

        if not article:
            logger.warning('Document not found for: %s_%s' %
                           (collection, code))
            return None

        if fmt == 'xylose':
            jarticle = None
            try:
                jarticle = json.loads(article)
            except:
                msg = 'Fail to load JSON when retrienving document: %s_%s' % (
                    collection, code)
                raise ValueError(msg)

            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s' % (collection, code))

            return xarticle

        logger.info('Document loaded: %s_%s' % (collection, code))
        return article
    def test_xmlarticle_meta_translated_abstract_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaAbstractsPipe()
        raw, xml = xmlarticle.transform(data)

        abstract = xml.find('./front/article-meta/trans-abstract/p')

        self.assertEqual(None, abstract)
Esempio n. 25
0
    def pipeline_pubmed(self):
        xylose_article = Article(self._article, iso_format='iso 639-2')

        ppl = plumber.Pipeline(export_pubmed.SetupArticleSetPipe(),
                               export_pubmed.XMLArticlePipe(),
                               export_pubmed.XMLJournalPipe(),
                               export_pubmed.XMLPublisherNamePipe(),
                               export_pubmed.XMLJournalTitlePipe(),
                               export_pubmed.XMLISSNPipe(),
                               export_pubmed.XMLVolumePipe(),
                               export_pubmed.XMLIssuePipe(),
                               export_pubmed.XMLPubDatePipe(),
                               export_pubmed.XMLReplacesPipe(),
                               export_pubmed.XMLArticleTitlePipe(),
                               export_pubmed.XMLFirstPagePipe(),
                               export_pubmed.XMLLastPagePipe(),
                               export_pubmed.XMLElocationIDPipe(),
                               export_pubmed.XMLLanguagePipe(),
                               export_pubmed.XMLAuthorListPipe(),
                               export_pubmed.XMLPublicationTypePipe(),
                               export_pubmed.XMLArticleIDListPipe(),
                               export_pubmed.XMLHistoryPipe(),
                               export_pubmed.XMLAbstractPipe(),
                               export_pubmed.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Esempio n. 26
0
    def pipeline_rsps(self):
        xylose_article = Article(self._article)

        ppl = plumber.Pipeline(export_rsps.SetupArticlePipe(),
                               export_rsps.XMLArticlePipe(),
                               export_rsps.XMLFrontPipe(),
                               export_rsps.XMLJournalMetaJournalIdPipe(),
                               export_rsps.XMLJournalMetaJournalTitleGroupPipe(),
                               export_rsps.XMLJournalMetaISSNPipe(),
                               export_rsps.XMLJournalMetaPublisherPipe(),
                               export_rsps.XMLArticleMetaArticleIdPublisherPipe(),
                               export_rsps.XMLArticleMetaArticleIdDOIPipe(),
                               export_rsps.XMLArticleMetaArticleCategoriesPipe(),
                               export_rsps.XMLArticleMetaTitleGroupPipe(),
                               export_rsps.XMLArticleMetaTranslatedTitleGroupPipe(),
                               export_rsps.XMLArticleMetaContribGroupPipe(),
                               export_rsps.XMLArticleMetaAffiliationPipe(),
                               export_rsps.XMLArticleMetaGeneralInfoPipe(),
                               export_rsps.XMLArticleMetaHistoryPipe(),
                               export_rsps.XMLArticleMetaPermissionPipe(),
                               export_rsps.XMLArticleMetaAbstractsPipe(),
                               export_rsps.XMLArticleMetaKeywordsPipe(),
                               export_rsps.XMLArticleMetaCountsPipe(),
                               export_rsps.XMLBodyPipe(),
                               export_rsps.XMLArticleMetaCitationsPipe(),
                               export_rsps.XMLSubArticlePipe(),
                               export_rsps.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Esempio n. 27
0
    def test_xmlarticle_meta_keywords_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('records')
        pxml.append(ET.Element('record'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_doaj.XMLArticleMetaKeywordsPipe()
        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./record/keywords').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Esempio n. 28
0
    def test_xmlarticle_meta_general_info_fulltext_uri_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v65': [{
                    '_': '201008'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('records')
        pxml.append(ET.Element('record'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_doaj.XMLArticleMetaFullTextUrlPipe()
        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./record/issue').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Esempio n. 29
0
def load_documents(collection, all_records=False):

    fltr = {
        'collection': collection
    }

    if all_records == False:
        fltr['body'] = {'$exists': 0}

    documents = articlemeta_db['articles'].find(
        fltr,
        {'code': 1},
    )

    if 'body' in fltr:
        del(fltr['body'])

    pids = []
    for document in documents:
        pids.append(document['code'])

    for pid in pids:
        fltr['code'] = pid
        document = articlemeta_db['articles'].find_one(
            fltr,
            {'_id': 0, 'citations': 0}
        )
        yield Article(document)

    documents.close()
Esempio n. 30
0
def load_documents(collection, all_records=False):

    fltr = {'collection': collection}

    if all_records is False:
        fltr['license'] = {'$exists': 0}

    documents = articlemeta_db['articles'].find(fltr, {'code': 1})

    pids = []
    for document in documents:
        pids.append(document['code'])

    if 'license' in fltr:
        del (fltr['license'])

    for pid in pids:
        fltr['code'] = pid
        document = articlemeta_db['articles'].find_one(fltr, {
            '_id': 0,
            'citations': 0
        })
        yield Article(document)

    documents.close()  # Release the cursor once it has no timeout.
    def test_xmlarticle_meta_keywords_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaKeywordsPipe()
        raw, xml = xmlarticle.transform(data)

        keywords_language = xml.find('./front/article-meta/kwd-group')

        self.assertEqual(None, keywords_language)
Esempio n. 32
0
    def _check_article_meta(self, metadata):
        """
            This method will check the given metadata and retrieve
            a new dictionary with some new fields.
        """

        article = Article(metadata)

        issns = set([article.any_issn(priority=u'electronic'),
                    article.any_issn(priority=u'print')])

        metadata['code_issue'] = article.publisher_id[1:18]
        metadata['code_title'] = list(issns)
        metadata['collection'] = article.collection_acronym
        metadata['document_type'] = article.document_type
        metadata['publication_year'] = article.publication_date[0:4]
        metadata['validated_scielo'] = 'False'
        metadata['validated_wos'] = 'False'
        metadata['sent_wos'] = 'False'
        metadata['sent_doaj'] = 'False'
        metadata['applicable'] = 'False'

        try:
            metadata['processing_date'] = article.processing_date
        except:
            if article.publication_date > datetime.now().date().isoformat():
                metadata['processing_date'] = datetime.now().date().isoformat()

        gtk = gen_title_keys(article)
        if gtk:
            metadata.update(gtk)

        gctk = gen_citations_title_keys(article)
        if gctk:
            metadata.update(gctk)

        return metadata
Esempio n. 33
0
def get_solr_args_from_article(document, indexed_date):
    article = Article(document)

    original_title = article.original_title()
    if original_title is not None:
        original_title = original_title

    try:  # publication_date format maybe yyyy-mm-dd
        publication_date = datetime.strptime(article.publication_date, '%Y-%m-%d').isoformat()
    except ValueError:
        try:  # publication_date format maybe yyyy-mm
            publication_date = datetime.strptime("{0}-01".format(article.publication_date), '%Y-%m-%d').isoformat()
        except ValueError:  # publication_date format maybe yyyy
            publication_date = datetime.strptime("{0}-01-01".format(article.publication_date), '%Y-%m-%d').isoformat()

    article_languages = article.languages()
    languages = []
    for l in article_languages:
        languages.append(l)

    article_authors = article.authors
    authors = []
    if article_authors is not None:
        for author in article_authors:
            author_name = u"{0} {1}".format(author["given_names"], author["surname"])
            authors.append(remove_control_chars(author_name))

    article_first_author = article.first_author
    if article_first_author is not None:
        first_author = remove_control_chars(u"{0} {1}".format(article_first_author["given_names"], article_first_author["surname"]))
    else:
        first_author = ""

    #Start - Insert categories and magazines
    # print ('Start - Insert categories and magazines')

    magazine_name = remove_control_chars(u"{0}".format(article.journal.title))
    magazine_issn = article.journal.scielo_issn
    magazine_abbreviated_title = remove_control_chars(article.journal.abbreviated_title)
    magazine_domain = article.scielo_domain
    magazine_acronym = article.journal.acronym


    try:
        magazine = Magazine.objects.get(magazine_name=magazine_name)
    except Magazine.DoesNotExist:
        magazine = Magazine.objects.create(magazine_name=magazine_name,
                                           magazine_abbreviated_title=magazine_abbreviated_title,
                                           magazine_issn=magazine_issn,
                                           magazine_domain=magazine_domain,
                                           magazine_acronym=magazine_acronym)
        magazine.save()

    category_ids = []
    if article.journal.subject_areas is not None:
        for item_category in article.journal.subject_areas:
            category_name = remove_control_chars(u"{0}".format(item_category)).title()

            try:
                category = Category.objects.get(category_name_en=category_name)
            except Category.DoesNotExist:
                category = Category.objects.create(category_name_en=category_name)
                category.save()

            category_ids.append(category.id)

            category_publication_relationship = False
            for category_loop in magazine.categories.all():
                if category_loop.category_name_en == category_name:
                    category_publication_relationship = True
                    break

            if not category_publication_relationship:
                magazine.categories.add(category)
                magazine.save()

    # print ('End - Insert categories and magazines')
    # End - Insert categories and magazines

    args = {
        "id": u"{0}{1}".format(article.publisher_id, article.collection_acronym),
        # "scielo_issn": article.journal.scielo_issn,
        "any_issn": article.journal.any_issn(),
        "journal_title": remove_control_chars(article.journal.title),  # Magazine
        "journal_id": magazine.id,

        "journal_volume": article.volume,
        "journal_number": article.issue,

        # "journal_abbreviated_title": remove_control_chars(article.journal.abbreviated_title),
        "original_title": remove_control_chars(original_title),
        "original_abstract": remove_control_chars(article.original_abstract()),
        "publication_date": "{0}Z".format(publication_date),
        # "journal_acronym": article.journal.acronym,
        "subject_areas": article.journal.subject_areas,  # Categories
        "subject_areas_ids": category_ids,  # Category ids

        "wos_subject_areas": article.journal.wos_subject_areas,

        "original_language": article.original_language(),
        "languages": languages,
        "document_type": article.document_type,
        "authors": authors,
        "first_author": first_author,
        "corporative_authors": article.corporative_authors,
        # "scielo_domain": article.scielo_domain,
        "publisher_id": article.publisher_id,
        "collection_acronym": article.collection_acronym,

        "indexed_date": indexed_date
    }

    # Adding cover if reindexing or updating.
    try:
        cover_article = CoverArticle.objects.get(article_id=args[u"id"])

        args[u"image_upload_path"] = cover_article.image
        args[u"image_upload_date"] = cover_article.upload_time
        args[u"image_uploader"] = cover_article.administrator.name
    except CoverArticle.DoesNotExist:
        pass

    article_translated_abstracts = article.translated_abstracts()
    if article_translated_abstracts is not None:
        for language in article_translated_abstracts:
            args[u"translated_abstracts_{0}".format(language)] = remove_control_chars(article_translated_abstracts[language])

    article_translated_titles = article.translated_titles()
    if article_translated_titles is not None:
        for language in article_translated_titles:
            args[u"translated_titles_{0}".format(language)] = remove_control_chars(article_translated_titles[language])

    article_keywords = article.keywords()
    if article_keywords is not None:
        for language in article_keywords:
            keywords = []
            for keyword in article_keywords[language]:
                keywords.append(remove_control_chars(keyword))
            args[u"keywords_{0}".format(language)] = keywords

    return args