Example #1
0
    def test_query_by_pid_without_cited_by(self):
        article = Article(fixtures.article)

        mock_load_article_title_keys = self.mocker.replace(
            controller.load_article)
        mock_load_article_title_keys(ANY, ANY)
        self.mocker.result(article)

        mock_coll = self.mocker.mock()
        mock_coll.find(ANY, ANY)
        self.mocker.result(None)
        self.mocker.replay()

        expected = {
            'article': {
                'code':
                u'S0101-31222002000100038',
                'title':
                u'Estratégias de luta das enfermeiras da Maternidade Leila Diniz para implantação de um modelo humanizado de assistência ao parto',
                'issn':
                u'0101-3122',
                'source':
                u'Revista Brasileira de Sementes',
                'url':
                u'http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0101-31222002000100038&lng=en&tlng=en'
            },
            'cited_by': None
        }

        self.assertEqual(
            controller.query_by_pid(mock_coll, 'S0101-31222002000100038'),
            expected)
def load_documents(articlemeta_db, collection, all_records=False):

    fltr = {'collection': collection}

    if all_records is False:
        fltr['license'] = {'$exists': 0}

    documents = articlemeta_db['articles'].find(fltr, {'code': 1})

    pids = []
    for document in documents:
        pids.append(document['code'])

    if 'license' in fltr:
        del (fltr['license'])

    for pid in pids:
        fltr['code'] = pid
        document = articlemeta_db['articles'].find_one(fltr, {
            '_id': 0,
            'citations': 0
        })
        yield Article(document)

    documents.close()  # Release the cursor once it has no timeout.
Example #3
0
def load_documents(collection, all_records=False):

    fltr = {'collection': collection}

    if all_records is False:
        fltr['fulltexts'] = {'$exists': 0}

    documents = articlemeta_db['articles'].find(fltr, {'code': 1})

    pids = []
    for document in documents:
        pids.append(document['code'])

    if 'fulltexts' in fltr:
        del (fltr['fulltexts'])

    for pid in pids:
        fltr['code'] = pid
        document = articlemeta_db['articles'].find_one(fltr, {
            '_id': 0,
            'citations': 0
        })
        yield Article(document)

    documents.close()
Example #4
0
 def test_should_return_none_if_no_document_dates(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertIsNone(document_pubdate)
    def test_xmlarticle_meta_keywords_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('records')
        pxml.append(ET.Element('record'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_doaj.XMLArticleMetaKeywordsPipe()
        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./record/keywords').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Example #6
0
    def document(self,
                 code,
                 collection,
                 replace_journal_metadata=True,
                 fmt='xylose'):
        try:
            article = self.client.get_article(code=code,
                                              collection=collection,
                                              replace_journal_metadata=True,
                                              fmt=fmt)
        except:
            msg = 'Error retrieving document: %s_%s' % (collection, code)
            raise ServerError(msg)

        if fmt == 'xylose':
            jarticle = None
            try:
                jarticle = json.loads(article)
            except:
                msg = 'Fail to load JSON when retrienving document: %s_%s' % (
                    collection, code)
                raise ServerError(msg)

            if not jarticle:
                logger.warning('Document not found for : %s_%s' %
                               (collection, code))
                return None

            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s' % (collection, code))

            return xarticle

        logger.info('Document loaded: %s_%s' % (collection, code))
        return article
Example #7
0
 def test_should_return_document_publication_date_if_it_is_presente(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertEqual(document_pubdate, ("2020", "01", "24"))
Example #8
0
    def test_xml_article_body_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLBodyPipe()

        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./body/p').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Example #9
0
    def test_xmlarticle_meta_article_categories_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'section': {
                u'pt': u'label pt',
                u'es': u'label es'
            }
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaArticleCategoriesPipe()
        raw, xml = xmlarticle.transform(data)

        categories = xml.find(
            './front/article-meta/article-categories/subj-group[@subj-group-type="heading"]/subject'
        ).text

        self.assertEqual(u'label pt', categories)
Example #10
0
    def test_xml_article_meta_article_id_doi_without_data_pipe(self):

        fakexylosearticle = Article({'article': {}, 'title': {}})

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaArticleIdDOIPipe()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find(
                './front/article-meta/article-id[@pub-id-type="doi"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Example #11
0
    def test_xml_article_body_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {},
            'body': {
                'pt': 'body pt',
                'es': 'body es'
            }
        })

        pxml = ET.Element('article')

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLBodyPipe()

        raw, xml = xmlarticle.transform(data)

        body = xml.find('./body/p').text

        self.assertEqual('body pt', body)
Example #12
0
    def test_xml_citation_date_with_year_and_month_and_day_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {},
            'citations': [{
                'v65': [{
                    '_': '20060430'
                }]
            }]
        }).citations[0]

        pxml = ET.Element('ref')
        pxml.append(ET.Element('element-citation'))

        data = [fakexylosearticle, pxml]

        raw, xml = self._xmlcitation.DatePipe().transform(data)

        expected_year = xml.find('./element-citation/date/year').text
        expected_month = xml.find('./element-citation/date/month').text
        expected_day = xml.find('./element-citation/date/day').text

        self.assertEqual(u'2006', expected_year)
        self.assertEqual(u'04', expected_month)
        self.assertEqual(u'30', expected_day)
Example #13
0
    def test_xmlarticle_meta_keywords_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaKeywordsPipe()
        raw, xml = xmlarticle.transform(data)

        keywords_language = xml.find('./front/article-meta/kwd-group')

        self.assertEqual(None, keywords_language)
Example #14
0
    def test_xmlarticle_meta_translated_abstract_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v40': [{
                    '_': 'pt'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_rsps.XMLArticleMetaAbstractsPipe()
        raw, xml = xmlarticle.transform(data)

        abstract = xml.find('./front/article-meta/trans-abstract/p')

        self.assertEqual(None, abstract)
Example #15
0
    def pipeline_rsps(self):
        xylose_article = Article(self._article)

        ppl = plumber.Pipeline(
            export_rsps.SetupArticlePipe(), export_rsps.XMLArticlePipe(),
            export_rsps.XMLFrontPipe(),
            export_rsps.XMLJournalMetaJournalIdPipe(),
            export_rsps.XMLJournalMetaJournalTitleGroupPipe(),
            export_rsps.XMLJournalMetaISSNPipe(),
            export_rsps.XMLJournalMetaPublisherPipe(),
            export_rsps.XMLArticleMetaArticleIdPublisherPipe(),
            export_rsps.XMLArticleMetaArticleIdDOIPipe(),
            export_rsps.XMLArticleMetaArticleCategoriesPipe(),
            export_rsps.XMLArticleMetaTitleGroupPipe(),
            export_rsps.XMLArticleMetaTranslatedTitleGroupPipe(),
            export_rsps.XMLArticleMetaContribGroupPipe(),
            export_rsps.XMLArticleMetaAffiliationPipe(),
            export_rsps.XMLArticleMetaDatesInfoPipe(),
            export_rsps.XMLArticleMetaIssueInfoPipe(),
            export_rsps.XMLArticleMetaElocationInfoPipe(),
            export_rsps.XMLArticleMetaPagesInfoPipe(),
            export_rsps.XMLArticleMetaHistoryPipe(),
            export_rsps.XMLArticleMetaPermissionPipe(),
            export_rsps.XMLArticleMetaAbstractsPipe(),
            export_rsps.XMLArticleMetaKeywordsPipe(),
            export_rsps.XMLArticleMetaCountsPipe(), export_rsps.XMLBodyPipe(),
            export_rsps.XMLArticleMetaCitationsPipe(),
            export_rsps.XMLSubArticlePipe(), export_rsps.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Example #16
0
    def test_xmlarticle_meta_contrib_group_author_without_xrefs_pipe(self):

        del (self._raw_json['article']['v71'])
        article_meta = Article(self._raw_json)

        pxml = ET.Element('article')
        pxml.append(ET.Element('front'))

        front = pxml.find('front')
        front.append(ET.Element('article-meta'))

        data = [self._article_meta, pxml]

        xmlarticle = export_rsps.XMLArticleMetaContribGroupPipe()
        raw, xml = xmlarticle.transform(data)

        fullnames = [
            i.get('rid') for i in xml.findall(
                './front/article-meta/contrib-group/contrib/xref')
        ]

        self.assertEqual([
            u'aff01', u'aff01', u'aff01', u'aff01', u'aff01', u'aff01',
            u'aff02', u'aff01', u'aff02', u'aff01', u'aff03'
        ], fullnames)
Example #17
0
    def pipeline_doaj(self):
        xylose_article = Article(self._article, iso_format='iso 639-2')

        ppl = plumber.Pipeline(export_doaj.SetupArticlePipe(),
                               export_doaj.XMLArticlePipe(),
                               export_doaj.XMLJournalMetaPublisherPipe(),
                               export_doaj.XMLJournalMetaJournalTitlePipe(),
                               export_doaj.XMLJournalMetaISSNPipe(),
                               export_doaj.XMLArticleMetaPublicationDatePipe(),
                               export_doaj.XMLArticleMetaVolumePipe(),
                               export_doaj.XMLArticleMetaIssuePipe(),
                               export_doaj.XMLArticleMetaStartPagePipe(),
                               export_doaj.XMLArticleMetaEndPagePipe(),
                               export_doaj.XMLArticleMetaArticleIdDOIPipe(),
                               export_doaj.XMLArticleMetaIdPipe(),
                               export_doaj.XMLArticleMetaDocumentTypePipe(),
                               export_doaj.XMLArticleMetaTitlePipe(),
                               export_doaj.XMLArticleMetaAuthorsPipe(),
                               export_doaj.XMLArticleMetaAffiliationPipe(),
                               export_doaj.XMLArticleMetaAbstractsPipe(),
                               export_doaj.XMLArticleMetaFullTextUrlPipe(),
                               export_doaj.XMLArticleMetaKeywordsPipe(),
                               export_doaj.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Example #18
0
    def test_xml_document_multiple_wok_subject_categories_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {
                'v854': [{
                    '_': 'Cat 1'
                }, {
                    '_': 'Cat 2'
                }]
            }
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.WOKSC()
        raw, xml = xmlarticle.transform(data)

        result = ', '.join([
            i.text
            for i in xml.findall('./field[@name="wok_subject_categories"]')
        ])

        self.assertEqual(u'Cat 1, Cat 2', result)
Example #19
0
 def test_should_return_issue_publication_date_if_it_is_presente(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v223": [{"_": "20200124"}],},
     }
     article = Article(metadata)
     __, issue_pubdate = conversion.get_article_dates(article)
     self.assertEqual(issue_pubdate, ("1997", "03", ""))
Example #20
0
    def test_xml_journal_title_pipe(self):

        fakexylosearticle = Article({
            'article': {},
            'title': {
                "v100": [{
                    "_": "Revista de Sa\u00fade P\u00fablica"
                }]
            }
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.JournalTitle()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find('./field[name="journal"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Example #21
0
 def test_should_return_creation_date_if_no_document_publication_date(self):
     metadata = {
         "article": {"v65": [{"_": "19970300"}], "v93": [{"_": "20000401"}],},
     }
     article = Article(metadata)
     document_pubdate, __ = conversion.get_article_dates(article)
     self.assertEqual(document_pubdate, ("2000", "04", "01"))
Example #22
0
    def test_xml_start_page_pipe(self):

        fakexylosearticle = Article({
            'article': {
                "v14": [{
                    "l": "649",
                    "_": "",
                    "f": "639"
                }]
            },
            'title': {}
        })

        pxml = ET.Element('doc')

        data = [fakexylosearticle, pxml]

        xmlarticle = pipeline_xml.StartPage()

        raw, xml = xmlarticle.transform(data)

        # This try except is a trick to test the expected result of the
        # piped XML, once the precond method don't raise an exception
        # we try to check if the preconditioned pipe was called or not.
        try:
            xml.find('./field[name="start_page"]').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
    def test_xmlarticle_meta_general_info_fulltext_uri_without_data_pipe(self):

        fakexylosearticle = Article({
            'article': {
                'v65': [{
                    '_': '201008'
                }]
            },
            'title': {}
        })

        pxml = ET.Element('records')
        pxml.append(ET.Element('record'))

        data = [fakexylosearticle, pxml]

        xmlarticle = export_doaj.XMLArticleMetaFullTextUrlPipe()
        raw, xml = xmlarticle.transform(data)

        try:
            xml.find('./record/issue').text
        except AttributeError:
            self.assertTrue(True)
        else:
            self.assertTrue(False)
Example #24
0
def query_by_meta(coll, title='', author='', year=''):

    article_meta = {}
    article_meta['title'] = title
    article_meta['author'] = author
    article_meta['year'] = year

    title_key = preparing_key(title, author, year)

    if not title_key:
        return None

    query = coll.find({'citations_keys': title_key}, {
        'article': 1,
        'title': 1,
        'collection': 1
    })

    citations = None
    if query:
        citations = []
        for doc in query:
            citation = Article(doc)
            meta = load_document_meta(citation)
            citations.append(meta)

    return {'article': article_meta, 'cited_by': citations}
    def document(self,
                 code,
                 collection=None,
                 replace_journal_metadata=True,
                 fmt='xylose'):

        query = {
            'code': code,
            'replace_journal_metadata': replace_journal_metadata,
            'fmt': fmt
        }

        if collection:
            query['collection'] = collection

        try:
            article = self.client.get_article(**query)
        except:
            msg = 'Error retrieving document: %s_%s' % (collection, code)
            raise ServerError(msg)

        if fmt == 'xylose':
            jarticle = json.loads(article)
            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s' % (collection, code))
            return xarticle
        else:
            logger.info('Document loaded: %s_%s' % (collection, code))
            return article
Example #26
0
def query_by_pid(coll, pid):

    article = load_article(coll, pid)

    if not article:
        return None

    title_keys = load_article_title_keys(article)
    query = coll.find({'citations_keys': {
        '$in': title_keys
    }}, {
        'article': 1,
        'title': 1,
        'collection': 1
    })

    citations = None
    if query:
        citations = []
        for doc in query:
            citation = Article(doc)
            meta = load_document_meta(citation)
            citations.append(meta)

    article_meta = load_document_meta(article)

    return {'article': article_meta, 'cited_by': citations}
Example #27
0
    def document(self,
                 code,
                 collection,
                 replace_journal_metadata=True,
                 fmt='xylose',
                 body=False):

        article = self.dispatcher('get_article',
                                  code=code,
                                  collection=collection,
                                  replace_journal_metadata=True,
                                  fmt=fmt,
                                  body=body)

        if not article:
            logger.warning('Document not found for: %s_%s', collection, code)
            return None

        if fmt in ['xylose', 'opac']:
            jarticle = None
            try:
                jarticle = json.loads(article)
            except:
                msg = 'Fail to load JSON when retrienving document: %s_%s' % (
                    collection, code)
                raise ValueError(msg)

            xarticle = Article(jarticle)
            logger.info('Document loaded: %s_%s', collection, code)

            return xarticle

        logger.info('Document loaded: %s_%s', collection, code)
        return article
Example #28
0
    def pipeline_pubmed(self):
        xylose_article = Article(self._article, iso_format='iso 639-2')

        ppl = plumber.Pipeline(export_pubmed.SetupArticleSetPipe(),
                               export_pubmed.XMLArticlePipe(),
                               export_pubmed.XMLJournalPipe(),
                               export_pubmed.XMLPublisherNamePipe(),
                               export_pubmed.XMLJournalTitlePipe(),
                               export_pubmed.XMLISSNPipe(),
                               export_pubmed.XMLVolumePipe(),
                               export_pubmed.XMLIssuePipe(),
                               export_pubmed.XMLPubDatePipe(),
                               export_pubmed.XMLReplacesPipe(),
                               export_pubmed.XMLArticleTitlePipe(),
                               export_pubmed.XMLFirstPagePipe(),
                               export_pubmed.XMLLastPagePipe(),
                               export_pubmed.XMLElocationIDPipe(),
                               export_pubmed.XMLLanguagePipe(),
                               export_pubmed.XMLAuthorListPipe(),
                               export_pubmed.XMLPublicationTypePipe(),
                               export_pubmed.XMLArticleIDListPipe(),
                               export_pubmed.XMLHistoryPipe(),
                               export_pubmed.XMLAbstractPipe(),
                               export_pubmed.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
Example #29
0
    def pipeline_sci(self):
        xylose_article = Article(self._article)

        ppl = plumber.Pipeline(
            export_sci.SetupArticlePipe(), export_sci.XMLArticlePipe(),
            export_sci.XMLFrontPipe(),
            export_sci.XMLJournalMetaJournalIdPipe(),
            export_sci.XMLJournalMetaJournalTitleGroupPipe(),
            export_sci.XMLJournalMetaISSNPipe(),
            export_sci.XMLJournalMetaCollectionPipe(),
            export_sci.XMLJournalMetaPublisherPipe(),
            export_sci.XMLArticleMetaUniqueArticleIdPipe(),
            export_sci.XMLArticleMetaArticleIdPublisherPipe(),
            export_sci.XMLArticleMetaArticleIdDOIPipe(),
            export_sci.XMLArticleMetaArticleCategoriesPipe(),
            export_sci.XMLArticleMetaTitleGroupPipe(),
            export_sci.XMLArticleMetaTranslatedTitleGroupPipe(),
            export_sci.XMLArticleMetaContribGroupPipe(),
            export_sci.XMLArticleMetaAffiliationPipe(),
            export_sci.XMLArticleMetaGeneralInfoPipe(),
            export_sci.XMLArticleMetaAbstractsPipe(),
            export_sci.XMLArticleMetaKeywordsPipe(),
            export_sci.XMLArticleMetaCitationsPipe(),
            export_sci.XMLClosePipe())

        transformed_data = ppl.run(xylose_article, rewrap=True)

        return next(transformed_data)
    def setUp(self):

        self._raw_json = json.loads(
            open(os.path.dirname(__file__) +
                 '/fixtures/article_meta.json').read())
        self._citation_meta = Article(self._raw_json).citations[0]

        self._xmlcitation = export_rsps.XMLCitation()