예제 #1
0
    def test_06(self):
        id1 = uuid.uuid4().hex
        id2 = uuid.uuid4().hex

        a = models.Article()
        a.set_id(id1)
        b = a.bibjson()
        b.title = "Example A article with a fulltext url"
        b.abstract = "a bunch of text"
        b.add_url(
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            urltype="fulltext")

        a2 = models.Article()
        a2.set_id(id2)
        b2 = a2.bibjson()
        b2.title = "Example B article with a fulltext url"
        b2.add_url(
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            urltype="fulltext")

        assert a2.id == id2

        a2.merge(a)

        assert a2.id == id1, (a2.id, id1, id2)
        assert a2.bibjson().title == "Example B article with a fulltext url"
        assert a2.bibjson().abstract is None
예제 #2
0
    def test_02(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url(
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            urltype="fulltext")
        a.save()

        # pause to allow the index time to catch up
        time.sleep(2)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            urltype="fulltext")

        # get the xwalk to determine if there is a duplicate
        xwalk = article.XWalk()
        d = xwalk.get_duplicate(z)

        assert d is not None
        assert d.bibjson().title == "Example 2 article with a fulltext url"
예제 #3
0
    def test_07_both_duplication_criteria(self):
        """Check that an article is only reported once if it is duplicated by both DOI and fulltext URL"""
        # make ourselves an example article
        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        doi = "10.doi/123"

        a = models.Article()
        b = a.bibjson()
        b.title = "Example article with a fulltext url and a DOI"
        b.add_url(ftu, urltype="fulltext")
        b.add_identifier('doi', doi)
        a.save(blocking=True)

        # create another article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url and a DOI"
        y.add_url(ftu, urltype="fulltext")
        y.add_identifier('doi', doi)

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        d = articleService.get_duplicates(z)

        assert len(d) == 1
        print len(d)
        assert d[0].bibjson(
        ).title == "Example article with a fulltext url and a DOI"
예제 #4
0
    def to_article_model(self, existing=None):
        dat = deepcopy(self.data)
        if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get(
                "journal", {}):
            dat["bibjson"]["start_page"] = dat["bibjson"]["journal"][
                "start_page"]
            del dat["bibjson"]["journal"]["start_page"]
        if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get(
                "journal", {}):
            dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]
            del dat["bibjson"]["journal"]["end_page"]

        # clear out fields that we don't accept via the API
        if "admin" in dat and "in_doaj" in dat["admin"]:
            del dat["admin"]["in_doaj"]
        if "admin" in dat and "seal" in dat["admin"]:
            del dat["admin"]["seal"]
        if "admin" in dat and "upload_id" in dat["admin"]:
            del dat["admin"]["upload_id"]

        if existing is None:
            return models.Article(**dat)
        else:
            merged = dataobj.merge_outside_construct(self._struct, dat,
                                                     existing.data)
            return models.Article(**merged)
예제 #5
0
    def test_01_same_fulltext(self):
        """Check duplication detection on articles with the same fulltext URL"""

        # A list of various URLs to check matching on
        ftus = [
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf",
            "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf",
            "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf"
        ]

        for ftu in ftus:
            # make ourselves an example article
            a = models.Article()
            b = a.bibjson()
            b.title = "Example article with a fulltext url"
            b.add_url(ftu, urltype="fulltext")
            a.save(blocking=True)

            # create a replacement article
            z = models.Article()
            y = z.bibjson()
            y.title = "Replacement article for fulltext url"
            y.add_url(ftu, urltype="fulltext")

            # determine if there's a duplicate
            articleService = DOAJ.articleService()
            d = articleService.get_duplicate(z)

            assert d is not None
            assert d.bibjson().title == "Example article with a fulltext url"
예제 #6
0
    def test_03(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url(
            "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf",
            urltype="fulltext")
        a.save()

        # pause to allow the index time to catch up
        time.sleep(2)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf",
            urltype="fulltext")

        # get the xwalk to determine if there is a duplicate
        xwalk = article.XWalk()
        d = xwalk.get_duplicate(z)

        assert d is not None
        assert d.bibjson().title == "Example 2 article with a fulltext url"
예제 #7
0
    def test_04(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url(
            "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf",
            urltype="fulltext")
        a.save()

        # pause to allow the index time to catch up
        time.sleep(2)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf",
            urltype="fulltext")

        # get the xwalk to determine if there is a duplicate
        xwalk = article.XWalk()
        d = xwalk.get_duplicate(z)

        assert d is not None
        assert d.bibjson().title == "Example 2 article with a fulltext url"
예제 #8
0
    def test_01_duplicates_report(self):
        """Check duplication reporting across all articles in the index"""

        # Create 2 identical articles, a duplicate pair
        article1 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a1_doi = article1.bibjson().get_identifiers('doi')
        assert a1_doi is not None
        article1.save(blocking=True)

        time.sleep(1)

        article2 = models.Article(**ArticleFixtureFactory.make_article_source(
            eissn='1111-1111',
            pissn='2222-2222',
            with_id=False,
            in_doaj=True,
            with_journal_info=True))
        a2_doi = article2.bibjson().get_identifiers('doi')
        assert a2_doi == a1_doi
        article2.save(blocking=True)

        # Run the reporting task
        user = app.config.get("SYSTEM_USERNAME")
        job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(
            user, outdir=TMP_DIR)
        task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(
            job)
        task.run()

        # The audit log should show we saved the reports to the TMP_DIR defined above
        audit_1 = job.audit.pop(0)
        assert audit_1.get('message', '').endswith(TMP_DIR)
        assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' +
                              dates.today() + '.csv')

        # It should also clean up its interim article csv
        assert not os.path.exists(
            paths.rel2abs(__file__, 'tmp_article_duplicate_report'))

        # The duplicates should be detected and appear in the report and audit summary count
        with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() +
                  '.csv') as f:
            csvlines = f.readlines()
            # We expect one result line + headings: our newest article has 1 duplicate
            res = csvlines.pop()
            assert res.startswith(
                article2.id
            )  # The newest comes first, so article1 is article2's duplicate.
            assert article1.id in res
            assert 'doi+fulltext' in res

        audit_2 = job.audit.pop(0)
        assert audit_2.get(
            'message', ''
        ) == '2 articles processed for duplicates. 1 global duplicate sets found.'
예제 #9
0
    def test_05_full_doi(self):
        """ Test that we still detect duplicate DOIs when we have the full URI, not just the 10. """
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a DOI"
        b.add_identifier('doi', "https://doi.org/10.doi/123")
        a.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a DOI"
        not_duplicate_bibjson.add_identifier(
            'doi', "https://doi.org/10.doi/DIFFERENT")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for DOI"
        y.add_identifier('doi', "http://doi.org/10.doi/123")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        dups = articleService.get_duplicates(z)
        assert len(dups) == 1

        # Check when we ask for one duplicate we get the most recent duplicate.
        d = articleService.get_duplicate(z)
        assert d is not None
        assert d.bibjson().title == "Example A article with a DOI", d.bibjson(
        ).title
예제 #10
0
    def test_04_with_doi_instead(self):
        """Detect a duplicate using the DOI field."""
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a DOI"
        b.add_identifier('doi', "10.doi/123")
        a.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a DOI"
        not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for DOI"
        y.add_identifier('doi', "10.doi/123")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        dups = articleService.get_duplicates(z)
        assert len(dups) == 1

        # Check when we ask for one duplicate we get the most recent duplicate.
        d = articleService.get_duplicate(z)
        assert d is not None
        assert d.bibjson().title == "Example A article with a DOI", d.bibjson(
        ).title
예제 #11
0
파일: test_query.py 프로젝트: mauromsl/doaj
    def test_09_search(self):
        # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ
        # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles.
        # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section.
        qsvc = QueryService()

        articles = []
        for i in range(0, 3):
            articles.append(
                models.Article(**ArticleFixtureFactory.make_article_source(
                    with_id=False)))
            assert articles[-1].publisher_record_id() == 'some_identifier'
            articles[-1].save(blocking=True)
        articles.append(
            models.Article(**ArticleFixtureFactory.make_article_source(
                with_id=False, in_doaj=False)))
        articles[-1].save(blocking=True)

        res = qsvc.search('query',
                          'article', {"query": {
                              "match_all": {}
                          }},
                          account=None,
                          additional_parameters={})
        assert res['hits']['total'] == 3, res['hits']['total']

        for hit in res['hits']['hits']:
            am = models.Article(**hit)
            assert am.publisher_record_id() is None, am.publisher_record_id()
예제 #12
0
    def test_02_merge(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")
        a.save()

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")

        # do a merge
        z.merge(a)

        # let the index catch up, then we can check this worked
        time.sleep(2)

        hist = models.ArticleHistory.get_history_for(a.id)
        print hist
        print len(hist)
        assert len(hist) == 1
        assert hist[0].data.get(
            "bibjson",
            {}).get("title") == "Example 2 article with a fulltext url"
예제 #13
0
    def test_01(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example article with a fulltext url"
        b.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")
        a.save()

        # pause to allow the index time to catch up
        time.sleep(2)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")

        # get the xwalk to determine if there is a duplicate
        xwalk = article.XWalk()
        d = xwalk.get_duplicate(z)

        assert d is not None
        assert d.bibjson().title == "Example article with a fulltext url"
예제 #14
0
    def test_02_merge(self):
        # make ourselves an example article
        a = models.Article()
        b = a.bibjson()
        b.title = "Example 2 article with a fulltext url"
        b.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")
        a.save()

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(
            "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf",
            urltype="fulltext")

        # do a merge
        z.merge(a)

        history_files = self.list_today_article_history_files()
        assert len(history_files) == 1

        with open(history_files[0], 'rb') as i:
            hist = json.loads(i.read())
        assert hist
        assert hist.get(
            "bibjson",
            {}).get("title") == "Example 2 article with a fulltext url"
예제 #15
0
    def test_04_with_doi_instead(self):
        """Detect a duplicate using the DOI field."""
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a DOI"
        b.add_identifier('doi', "10.doi/123")
        a.save(blocking=True)

        # Wait a second to ensure the timestamps are different
        time.sleep(1.01)

        a2 = models.Article()
        b2 = a2.bibjson()
        b2.title = "Example B article with a DOI"
        b2.add_identifier('doi', "10.doi/123")
        a2.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a DOI"
        not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for DOI"
        y.add_identifier('doi', "10.doi/123")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        dups = articleService.get_duplicates(z)
        assert len(dups) == 2

        # Check when we ask for one duplicate we get the most recent duplicate.
        d = articleService.get_duplicate(z)
        assert d is not None
        assert d.bibjson().title == "Example B article with a DOI", d.bibjson(
        ).title

        # get the xwalk to determine all duplicates
        # sort both results and expectations here to avoid false alarm
        # we don't care about the order of duplicates
        expected = sorted([a, a2])
        # determine if there's a duplicate
        l = articleService.get_duplicates(z)
        assert isinstance(l, list)
        assert l
        assert len(l) == 2
        l.sort()
        assert expected == l
예제 #16
0
    def test_03_retrieve_multiple_conflict(self):

        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a fulltext url"
        b.add_url(ftu, urltype="fulltext")
        a.save(blocking=True)

        # Wait a second to ensure the timestamps are different
        time.sleep(1.01)

        a2 = models.Article()
        b2 = a2.bibjson()
        b2.title = "Example B article with a fulltext url"
        b2.add_url(ftu, urltype="fulltext")
        a2.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a fulltext url"
        not_duplicate_bibjson.add_url("http://this.is/a/different/url",
                                      urltype="fulltext")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(ftu, urltype="fulltext")

        # determine that there are multiple duplicates
        articleService = DOAJ.articleService()
        with self.assertRaises(ArticleMergeConflict):
            d = articleService.get_duplicate(z)

        # get the xwalk to determine all duplicates
        # sort both results and expectations here to avoid false alarm
        # we don't care about the order of duplicates
        expected = [a, a2]
        expected.sort(key=lambda x: datetime.strptime(x.last_updated,
                                                      "%Y-%m-%dT%H:%M:%SZ"),
                      reverse=True)
        # determine if there's a duplicate
        l = articleService.get_duplicates(z)
        assert isinstance(l, list), l
        assert l is not None
        l.sort(key=lambda x: datetime.strptime(x.last_updated,
                                               "%Y-%m-%dT%H:%M:%SZ"),
               reverse=True)
        assert expected == l
예제 #17
0
    def test_03_retrieve_latest(self):

        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        # make ourselves a couple of example articles
        a = models.Article()
        b = a.bibjson()
        b.title = "Example A article with a fulltext url"
        b.add_url(ftu, urltype="fulltext")
        a.save(blocking=True)

        # Wait a second to ensure the timestamps are different
        time.sleep(1.01)

        a2 = models.Article()
        b2 = a2.bibjson()
        b2.title = "Example B article with a fulltext url"
        b2.add_url(ftu, urltype="fulltext")
        a2.save(blocking=True)

        # create an article which should not be caught by the duplicate detection
        not_duplicate = models.Article()
        not_duplicate_bibjson = not_duplicate.bibjson()
        not_duplicate_bibjson.title = "Example C article with a fulltext url"
        not_duplicate_bibjson.add_url("http://this.is/a/different/url",
                                      urltype="fulltext")
        not_duplicate.save(blocking=True)

        # create a replacement article
        z = models.Article()
        y = z.bibjson()
        y.title = "Replacement article for fulltext url"
        y.add_url(ftu, urltype="fulltext")

        # determine if there's a duplicate
        articleService = DOAJ.articleService()
        d = articleService.get_duplicate(z)

        # Check when we ask for one duplicate we get the most recent duplicate.
        assert d is not None
        assert d.bibjson(
        ).title == "Example B article with a fulltext url", d.bibjson().title

        # get the xwalk to determine all duplicates
        # sort both results and expectations here to avoid false alarm
        # we don't care about the order of duplicates
        expected = sorted([a, a2])
        # determine if there's a duplicate
        l = articleService.get_duplicates(z)
        assert isinstance(l, list), l
        assert l is not None
        l.sort()
        assert expected == l
예제 #18
0
    def to_article_model(self, existing=None):
        dat = deepcopy(self.data)
        if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}):
            dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"]
            del dat["bibjson"]["journal"]["start_page"]
        if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}):
            dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"]
            del dat["bibjson"]["journal"]["end_page"]

        if existing is None:
            return models.Article(**dat)
        else:
            merged = dataobj.merge_outside_construct(self._struct, dat, existing.data)
            return models.Article(**merged)
예제 #19
0
    def test_09_article(self):
        """test if the OAI-PMH journal feed returns records and only displays journals accepted in DOAJ"""
        article_source = ArticleFixtureFactory.make_article_source(eissn='1234-1234', pissn='5678-5678,', in_doaj=False)
        """test if the OAI-PMH article feed returns records and only displays articles accepted in DOAJ"""
        a_private = models.Article(**article_source)
        ba = a_private.bibjson()
        ba.title = "Private Article"
        a_private.save(blocking=True)

        article_source = ArticleFixtureFactory.make_article_source(eissn='4321-4321', pissn='8765-8765,', in_doaj=True)
        a_public = models.Article(**article_source)
        ba = a_public.bibjson()
        ba.title = "Public Article"
        a_public.save(blocking=True)
        public_id = a_public.id

        time.sleep(1)

        with self.app_test.test_request_context():
            with self.app_test.test_client() as t_client:
                resp = t_client.get(url_for('oaipmh.oaipmh',  specified='article', verb='ListRecords', metadataPrefix='oai_dc'))
                assert resp.status_code == 200

                t = etree.fromstring(resp.data)
                records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns)

                # Check we only have one journal returned
                r = records[0].xpath('//oai:record', namespaces=self.oai_ns)
                assert len(r) == 1

                # Check we have the correct journal
                title = r[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text
                # check orcid_id xwalk
                assert str(records[0].xpath('//dc:creator/@id', namespaces=self.oai_ns)[0]) == a_public.bibjson().author[0].get("orcid_id")
                assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title

                resp = t_client.get(url_for('oaipmh.oaipmh',  specified='article', verb='GetRecord', metadataPrefix='oai_dc') + '&identifier=abcdefghijk_article')
                assert resp.status_code == 200

                t = etree.fromstring(resp.data)
                records = t.xpath('/oai:OAI-PMH/oai:GetRecord', namespaces=self.oai_ns)

                # Check we only have one journal returnedt
                kids = records[0].getchildren()
                r = records[0].xpath('//oai:record', namespaces=self.oai_ns)
                assert len(r) == 1

                # Check we have the correct journal
                assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title
예제 #20
0
파일: toc.py 프로젝트: kelly-mcdougall/doaj
def minimise_article(full_article):
    # we want to keep the id and the bibjson
    id = full_article.id
    bibjson = deepcopy(full_article.bibjson())
    
    # remove the issns from the bibjson
    bibjson.remove_identifiers(idtype=bibjson.P_ISSN)
    bibjson.remove_identifiers(idtype=bibjson.E_ISSN)
    
    # remove all the journal metadata
    bibjson.remove_journal_metadata()
    
    # remove all the subject classifications
    bibjson.remove_subjects()
    
    # remove the year and the month (they are held elsewhere in this case)
    del bibjson.month
    del bibjson.year
    
    # create a minimised version of the article
    minimised = models.Article()
    minimised.set_id(id)
    minimised.set_bibjson(bibjson)
    
    return minimised
예제 #21
0
    def setUp(self):
        super(TestTaskJournalBulkDelete, self).setUp()

        self.journals = []
        self.articles = []
        for j_src in JournalFixtureFactory.make_many_journal_sources(
                count=TEST_JOURNAL_COUNT):
            j = models.Journal(**j_src)
            self.journals.append(j)
            j.save()
            for i in range(0, TEST_ARTICLES_PER_JOURNAL):
                a = models.Article(**ArticleFixtureFactory.make_article_source(
                    with_id=False,
                    eissn=j.bibjson().first_eissn,
                    pissn=j.bibjson().first_pissn))
                a.save()
                self.articles.append(a)

        sleep(2)

        self.forbidden_accounts = [
            AccountFixtureFactory.make_editor_source()['id'],
            AccountFixtureFactory.make_assed1_source()['id'],
            AccountFixtureFactory.make_assed2_source()['id'],
            AccountFixtureFactory.make_assed3_source()['id']
        ]

        self._make_and_push_test_context(acc=models.Account(
            **AccountFixtureFactory.make_managing_editor_source()))
예제 #22
0
    def test_07_retrieve_article_fail(self):
        # set up all the bits we need
        # add a journal to the account
        account = models.Account()
        account.set_id('test')
        account.set_name("Tester")
        account.set_email("*****@*****.**")
        journal = models.Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        journal.set_owner(account.id)
        journal.save()
        time.sleep(1)

        data = ArticleFixtureFactory.make_article_source()
        data['admin']['in_doaj'] = False
        ap = models.Article(**data)
        ap.save()
        time.sleep(1)

        # should fail when no user and in_doaj is False
        with self.assertRaises(Api401Error):
            a = ArticlesCrudApi.retrieve(ap.id, None)

        # wrong user
        account = models.Account()
        account.set_id("asdklfjaioefwe")
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve(ap.id, account)

        # non-existant article
        account = models.Account()
        account.set_id(ap.id)
        with self.assertRaises(Api404Error):
            a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
예제 #23
0
    def test_06_crossref_article_xml_xwalk(self):
        handle = CrossrefArticleFixtureFactory.upload_2_issns_correct()
        xwalk = CrossrefXWalk()
        art = xwalk.crosswalk_file(file_handle=handle, add_journal_info=False)
        article = models.Article(**art[0])
        bibjson = article.bibjson()

        assert bibjson.journal_title == "2 ISSNs Correct", "expected '2 ISSNs Correct', received: {}".format(
            bibjson.journal_title)
        assert bibjson.get_one_identifier(
            bibjson.P_ISSN
        ) == "1234-5678", "expected '1234-5678', received: {}".format(
            bibjson.get_one_identifier(bibjson.P_ISSN))
        assert bibjson.get_one_identifier(
            bibjson.E_ISSN
        ) == "9876-5432", "expected '9876-5432', received: {}".format(
            bibjson.get_one_identifier(bibjson.E_ISSN))
        assert bibjson.year == "2004", "expected '2004', received: {}".format(
            bibjson.year)
        assert bibjson.title == "Article 12292005 9:32", "expected 'Article 12292005 9:32', received: {}".format(
            bibjson.title)
        assert bibjson.author == [{
            'name': 'Surname, Bob'
        }], "expected [{{'name': 'Surname, Bob'}}]', received: {}".format(
            bibjson.author)
        assert bibjson.get_single_url(
            "fulltext"
        ) == "http://www.crossref.org/", "expected 'http://www.crossref.org/', received: {}".format(
            bibjson.get_single_url("fulltext"))
예제 #24
0
    def test_05_doaj_article_xml_xwalk(self):
        handle = DoajXmlArticleFixtureFactory.upload_2_issns_correct()
        xwalk = DOAJXWalk()
        art = xwalk.crosswalk_file(file_handle=handle, add_journal_info=False)
        article = models.Article(**art[0])
        bibjson = article.bibjson()

        assert bibjson.journal_language == [
            "fre"
        ], "expected ['fre'], actual: {} ".format(bibjson.journal_language)
        assert bibjson.publisher == "Codicille éditeur et CRILCQ", "expected 'Codicille éditeur et CRILCQ', actual: {} ".format(
            bibjson.publisher)
        assert bibjson.journal_title == "2 ISSNs Correct", "expected '2 ISSNs Correct', received: {}".format(
            bibjson.journal_title)
        assert bibjson.get_one_identifier(
            bibjson.P_ISSN
        ) == "1234-5678", "expected '1234-5678', received: {}".format(
            bibjson.get_one_identifier(bibjson.P_ISSN))
        assert bibjson.get_one_identifier(
            bibjson.E_ISSN
        ) == "9876-5432", "expected '9876-5432', received: {}".format(
            bibjson.get_one_identifier(bibjson.E_ISSN))
        assert bibjson.year == "2013", "expected '2013', received: {}".format(
            bibjson.year)
        assert bibjson.title == "Imaginaires autochtones contemporains. Introduction", "expected 'Imaginaires autochtones contemporains. Introduction', received: {}".format(
            bibjson.title)
        assert bibjson.author == [{
            'name': 'Papillon, Joëlle'
        }], "expected [{{'name': 'Papillon, Joëlle'}}]', received: {}".format(
            bibjson.author)
        assert bibjson.get_single_url(
            "fulltext"
        ) == "http://doaj.org/testing/url.pdf", "expected 'http://doaj.org/testing/url.pdf', received: {}".format(
            bibjson.get_single_url("fulltext"))
예제 #25
0
    def test_01_withdraw_task(self):
        sources = JournalFixtureFactory.make_many_journal_sources(10,
                                                                  in_doaj=True)
        ids = []
        articles = []
        for source in sources:
            j = models.Journal(**source)
            j.save()
            ids.append(j.id)

            pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN)
            eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN)
            asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0],
                                                                eissn=eissn[0],
                                                                with_id=False)
            a = models.Article(**asource)
            a.save()
            articles.append(a.id)

        time.sleep(2)

        job = SetInDOAJBackgroundTask.prepare("testuser",
                                              journal_ids=ids,
                                              in_doaj=False)
        SetInDOAJBackgroundTask.submit(job)

        time.sleep(2)

        for id in ids:
            j = models.Journal.pull(id)
            assert j.is_in_doaj() is False

        for id in articles:
            a = models.Article.pull(id)
            assert a.is_in_doaj() is False
예제 #26
0
    def test_14_article_model_index(self):
        """Check article indexes generate"""
        a = models.Article(**ArticleFixtureFactory.make_article_source())
        assert a.data.get('index', None) is None

        # Generate the index
        a.prep()
        assert a.data.get('index', None) is not None
예제 #27
0
    def test_33_article_stats(self):
        articles = []

        # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search
        for i in range(1, 3):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111",
                                                            pissn="1111-1111",
                                                            with_id=False,
                                                            in_doaj=True))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(3, 5):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="1111-1111",
                                                            pissn="1111-1111",
                                                            with_id=False,
                                                            in_doaj=False))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(5, 7):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222",
                                                            pissn="2222-2222",
                                                            with_id=False,
                                                            in_doaj=True))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)
        for i in range(7, 9):
            article = models.Article(
                **ArticleFixtureFactory.make_article_source(eissn="2222-2222",
                                                            pissn="2222-2222",
                                                            with_id=False,
                                                            in_doaj=False))
            article.set_created("2019-01-0" + str(i) + "T00:00:00Z")
            articles.append(article)

        for i in range(len(articles)):
            articles[i].save(blocking=i == len(articles) - 1)

        journal = models.Journal()
        bj = journal.bibjson()
        bj.add_identifier(bj.P_ISSN, "1111-1111")
        stats = journal.article_stats()
        assert stats.get("total") == 2
        assert stats.get("latest") == "2019-01-02T00:00:00Z"
예제 #28
0
    def test_10_journal_deletes(self):
        # tests the various methods that are key to journal deletes

        # populate the index with some journals
        for i in range(5):
            j = models.Journal()
            j.set_in_doaj(True)
            bj = j.bibjson()
            bj.title = "Test Journal {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            bj.add_url("http://homepage.com/{x}".format(x=i), "homepage")
            j.save()

            # make sure the last updated dates are suitably different
            time.sleep(0.66)

        # populate the index with some articles
        for i in range(5):
            a = models.Article()
            a.set_in_doaj(True)
            bj = a.bibjson()
            bj.title = "Test Article {x}".format(x=i)
            bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i))
            bj.publisher = "Test Publisher {x}".format(x=i)
            a.save()

            # make sure the last updated dates are suitably different
            time.sleep(0.66)

        # now hit the key methods involved in journal deletes
        query = {
            "query": {
                "bool": {
                    "must": [{
                        "term": {
                            "bibjson.title.exact": "Test Journal 1"
                        }
                    }]
                }
            }
        }
        count = models.Journal.hit_count(query)
        assert count == 1

        issns = models.Journal.issns_by_query(query)
        assert len(issns) == 1
        assert "1000-0000" in issns

        models.Journal.delete_selected(query, articles=True)
        time.sleep(1)

        assert len(models.Article.all()) == 4
        assert len(self.list_today_article_history_files()) == 1

        assert len(models.Journal.all()) == 4
        assert len(self.list_today_journal_history_files()
                   ) == 6  # Because all journals are snapshot at create time
예제 #29
0
    def test_02_toc_requirements(self):
        """ Check what we need for ToCs are in the article models """
        a = models.Article(**ArticleFixtureFactory.make_article_source())
        a.prep()

        # To build ToCs we need a volume, an issue, a year and a month.
        assert a.data['bibjson']['journal']['volume'] == '1'
        assert a.data['bibjson']['journal']['number'] == '99'
        assert a.data['index']['date'] == "1991-01-01T00:00:00Z"
        assert a.data['index']['date_toc_fv_month'] == a.data['index']['date'] == "1991-01-01T00:00:00Z"
예제 #30
0
    def test_06_merge_replaces_metadata(self):
        """Ensure that merging replaces metadata of a new article, but keeps its old id."""

        ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf"
        id1 = uuid.uuid4().hex
        id2 = uuid.uuid4().hex
        assert id1 != id2

        a = models.Article()
        a.set_id(id1)
        b = a.bibjson()
        b.title = "Example A article with a fulltext url"
        b.abstract = "a bunch of text"
        b.add_url(ftu, urltype="fulltext")

        a2 = models.Article()
        a2.set_id(id2)
        b2 = a2.bibjson()
        b2.title = "Example B article with a fulltext url"
        b2.add_url(ftu, urltype="fulltext")

        # perform a merge, which updates article records of a2 based on a - including the id.
        assert a2.id == id2
        a2.merge(a)
        assert a2.id == id1

        # Check that we have the newer metadata
        assert a2.bibjson().title == "Example B article with a fulltext url"
        assert a2.bibjson().abstract is None

        # Create a 3rd article without an explicit id
        a3 = models.Article()
        b3 = a3.bibjson()
        b3.title = "Example C article with a fulltext url"
        b3.abstract = "a newer bunch of text"
        b3.add_url(ftu, urltype="fulltext")

        a3.merge(a2)

        assert a3.id == a2.id == a.id
        assert a3.bibjson().title == "Example C article with a fulltext url"
        assert a3.bibjson().abstract == "a newer bunch of text"