Esempio n. 1
0
File: openurl.py Progetto: DOAJ/doaj
    def query_for_vol(self, journalobj):

        # The journal object will already be the correct continuation, if the user provided sufficient detail.
        issns = journalobj.bibjson().issns()

        # If there's no way to get the wanted issns, give up, else run the query
        if issns is None:
            return None
        else:
            volume_query = deepcopy(TERMS_SEARCH)
            volume_query["size"] = 0

            issn_term = {"terms": {"index.issn.exact": issns}}
            volume_query["query"]["bool"]["must"].append(issn_term)

            vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}}
            volume_query["query"]["bool"]["must"].append(vol_term)

            # And if there's an issue, query that too. Note, issue does not make sense on its own.
            if self.issue:
                iss_term = {"term": {"bibjson.journal.number.exact": self.issue}}
                volume_query["query"]["bool"]["must"].append(iss_term)

            app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query))
            return Article.query(q=volume_query)
Esempio n. 2
0
def wipe_emails(connection, batch_size=500):

    batch = []

    for a in esprit.tasks.scroll(connection, 'article', q=HAS_EMAIL_QUERY):
        # Create the article model
        article = Article(**a)
        # Use the DataObj prune to remove emails
        _ = article.bibjson(construct_silent_prune=True)
        batch.append(article.data)

        if len(batch) >= batch_size:
            esprit.raw.bulk(connection, 'article', batch, idkey='id')
            batch = []

    # Finish saving the final batch
    esprit.raw.bulk(connection, 'article', batch, idkey='id')
Esempio n. 3
0
File: journal.py Progetto: DOAJ/doaj
    def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True):
        if articles:
            # list the issns of all the journals
            issns = cls.issns_by_query(query)

            # issue a delete request over all the articles by those issns
            from portality.models import Article
            Article.delete_by_issns(issns, snapshot=snapshot_articles)

        # snapshot the journal record
        if snapshot_journals:
            js = cls.iterate(query, page_size=1000)
            for j in js:
                j.snapshot()

        # finally issue a delete request against the journals
        cls.delete_by_query(query)
Esempio n. 4
0
File: journal.py Progetto: DOAJ/doaj
 def article_stats(self):
     from portality.models import Article
     q = ArticleStatsQuery(self.known_issns())
     data = Article.query(q=q.query())
     hits = data.get("hits", {})
     total = hits.get("total", 0)
     latest = None
     if total > 0:
         latest = hits.get("hits", [])[0].get("_source").get("created_date")
     return {
         "total" : total,
         "latest" : latest
     }
Esempio n. 5
0
    def query_es(self):
        """
        Query Elasticsearch for a set of matches for this request.
        :return: The results of a query through the dao, a JSON object.
        """
        # Copy to the template, which will be populated with terms
        populated_query = deepcopy(TERMS_SEARCH)

        # Get all of the attributes with values set.
        set_attributes = [(x, getattr(self, x))
                          for x in JOURNAL_SCHEMA_KEYS[:-1]
                          if getattr(self, x)]

        # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
        if not self.genre:
            self.genre = SUPPORTED_GENRES[
                0]  # TODO: we may want to handle 404 instead

        # Set i to use either our mapping for journals or articles
        i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())

        # Add the attributes to the query
        for (k, v) in set_attributes:
            es_term = OPENURL_TO_ES[k][i]
            if es_term is None:
                continue
            else:
                term = {"term": {es_term: v}}
            populated_query["query"]["bool"]["must"].append(term)

        # avoid doing an empty query
        if len(populated_query["query"]["bool"]["must"]) == 0:
            app.logger.debug("No valid search terms in OpenURL object")
            return None

        # Return the results of the query
        if i == 0:
            app.logger.debug("OpenURL query to journal: " +
                             json.dumps(populated_query))
            return Journal.query(q=populated_query)
        elif i == 1:
            app.logger.debug("OpenURL query to article: " +
                             json.dumps(populated_query))
            return Article.query(q=populated_query)
Esempio n. 6
0
    def setUp(self):
        super(TestCreateOrUpdateArticle, self).setUp()

        self.publisher = Account()
        self.publisher.add_role("publisher")
        self.publisher.save(blocking=True)

        self.admin = Account()
        self.admin.add_role("admin")
        self.admin.save(blocking=True)

        sources = JournalFixtureFactory.make_many_journal_sources(2, True)
        self.journal1 = Journal(**sources[0])
        self.journal1.set_owner(self.publisher.id)
        jbib1 = self.journal1.bibjson()
        jbib1.add_identifier(jbib1.P_ISSN, "1111-1111")
        jbib1.add_identifier(jbib1.E_ISSN, "2222-2222")
        self.journal1.save(blocking=True)

        self.publisher.add_journal(self.journal1)

        self.journal2 = Journal(**sources[1])
        jbib2 = self.journal2.bibjson()
        jbib2.add_identifier(jbib2.P_ISSN, "1234-5678")
        jbib2.add_identifier(jbib2.E_ISSN, "9876-5432")
        self.journal2.save(blocking=True)

        self.article10 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-10",
            fulltext="https://www.article10.com"))
        self.article10.set_id("articleid10")
        self.article10.save(blocking=True)

        self.article11 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-11",
            fulltext="https://www.article11.com"))
        self.article11.set_id("articleid11")
        self.article11.save(blocking=True)

        self.article2 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1234-5678",
            eissn="9876-5432",
            doi="10.0000/article-2",
            fulltext="https://www.article2.com"))
        self.article2.set_id("articleid2")
        self.article2.save(blocking=True)
Esempio n. 7
0
    def test_04_old_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url("https://updated.com", ba.FULLTEXT)

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 new, received: {}".format(resp)
        art = Article.pull(self.article10.id)
        assert art.get_normalised_fulltext(
        ) == "//updated.com", "expected //updated.com, received: {}".format(
            self.article10.get_normalised_fulltext())
Esempio n. 8
0
File: openurl.py Progetto: DOAJ/doaj
    def query_es(self):
        """
        Query Elasticsearch for a set of matches for this request.
        :return: The results of a query through the dao, a JSON object.
        """
        # Copy to the template, which will be populated with terms
        populated_query = deepcopy(TERMS_SEARCH)

        # Get all of the attributes with values set.
        set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)]

        # If we don't have a genre, guess journal FIXME: is it correct to assume journal?
        if not self.genre:
            self.genre = SUPPORTED_GENRES[0]    # TODO: we may want to handle 404 instead

        # Set i to use either our mapping for journals or articles
        i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower())

        # Add the attributes to the query
        for (k, v) in set_attributes:
            es_term = OPENURL_TO_ES[k][i]
            if es_term is None:
                continue
            else:
                term = {"term": {es_term: v}}
            populated_query["query"]["bool"]["must"].append(term)

        # avoid doing an empty query
        if len(populated_query["query"]["bool"]["must"]) == 0:
            app.logger.debug("No valid search terms in OpenURL object")
            return None

        # Return the results of the query
        if i == 0:
            app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query))
            return Journal.query(q=populated_query)
        elif i == 1:
            app.logger.debug("OpenURL query to article: " + json.dumps(populated_query))
            return Article.query(q=populated_query)
Esempio n. 9
0
    def test_00_no_doi_and_url_changed(self):
        ba = self.article10.bibjson()
        ba.title = "Updated Article"

        # try for admin

        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)

        a = Article.pull(self.article10.id)
        assert a.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \
            .format(a.bibjson().title)
Esempio n. 10
0
    def test_05_new_doi_old_url(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI, "10.0000/article-UPDATED")

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        art = Article.pull(self.article10.id)
        assert art.get_normalised_doi() == "10.0000/article-UPDATED", \
            "expected 10.0000/article-UPDATED, received: {}".format(
                self.article10.get_normalised_doi())
    def test_is_acceptable(self, value, kwargs):
        doi_arg = kwargs.get("doi")
        ft_arg = kwargs.get("fulltext_url")
        is_acceptable_arg = kwargs.get("is_acceptable")

        is_acceptable = True if is_acceptable_arg == "yes" else False
        doi = "10.1234/article-10" if doi_arg == "exists" else None
        ft = "https://example.com" if ft_arg == "exists" else None

        article_source = ArticleFixtureFactory.make_article_source()
        article = Article(**article_source)

        if doi is None:
            article.bibjson().remove_identifiers("doi")
        if ft is None:
            article.bibjson().remove_urls("fulltext")

        if is_acceptable:
            self.assertIsNone(self.svc.is_acceptable(article))

        else:
            with self.assertRaises(exceptions.ArticleNotAcceptable):
                self.svc.is_acceptable(article)
Esempio n. 12
0
    def test_01_is_legitimate_owner(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_eissn_arg = kwargs.get("article_eissn")
        article_pissn_arg = kwargs.get("article_pissn")
        seen_eissn_arg = kwargs.get("seen_eissn")
        seen_pissn_arg = kwargs.get("seen_pissn")
        journal_owner_arg = kwargs.get("journal_owner")

        raises_arg = kwargs.get("raises")
        legit_arg = kwargs.get("legit")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # generate our incoming article
        article = None
        eissn = None
        pissn = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source()
            article = Article(**source)
            article.set_id()

            article.bibjson().remove_identifiers("pissn")
            if article_pissn_arg == "yes":
                pissn = "1234-5678"
                article.bibjson().add_identifier("pissn", pissn)

            article.bibjson().remove_identifiers("eissn")
            if article_eissn_arg == "yes":
                eissn = "9876-5432"
                article.bibjson().add_identifier("eissn", eissn)

        # assemble the issns that will appear to be in the index.  One that is irrelevant, and just
        # serves to be "noise" in the database, and the other that matches the spec required by
        # the test
        issns = [("1111-1111", "2222-2222")]
        if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes":
            issns.append((eissn, pissn))
        if eissn is not None and seen_eissn_arg == "yes":
            issns.append((eissn, None))
        if pissn is not None and seen_pissn_arg == "yes":
            issns.append((None, pissn))

        owners = []
        if journal_owner_arg == "none":
            owners = [None]
        elif journal_owner_arg == "correct" and owner_id is not None:
            owners = [owner_id]
        elif journal_owner_arg == "incorrect":
            owners = ["randomowner"]
        elif journal_owner_arg == "mix" and owner_id is not None:
            owners.append(owner_id)
            owners.append("randomowner")
            owners.append(None)

        mock = ModelJournalMockFactory.find_by_issn(issns, owners)
        Journal.find_by_issn = mock

        ###########################################################
        # Execution

        svc = DOAJ.articleService()

        if raises is not None:
            with self.assertRaises(raises):
                svc.is_legitimate_owner(article, owner_id)
        else:
            legit = svc.is_legitimate_owner(article, owner_id)

            if legit_arg == "no":
                assert legit is False
            elif legit_arg == "yes":
                assert legit is True
Esempio n. 13
0
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save()

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(
                    eissn="1234-5678",
                    pissn="9876-5432",
                    doi=the_doi,
                    fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save(blocking=True)
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678"  # one matching
            pissn = "6789-1234"  # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article)
        else:
            possible_articles = svc.discover_duplicates(article)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[
                        -1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[
                        0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
Esempio n. 14
0
 def articles(self):
     return [Article(**a) for a in self.data.get("articles", [])]
Esempio n. 15
0
def migrate_articles(source, batch_size=5000):
    # read in the content
    f = open(source)
    xml = etree.parse(f)
    f.close()
    articles = xml.getroot()
    print "migrating", str(len(articles)), "article records from", source
    
    batch = []
    for element in articles:
        a = Article()
        b = _to_article_bibjson(element)
        a.set_bibjson(b)
        a.set_created(_created_date(element))
        a.set_id()
        batch.append(a.data)
        
        if len(batch) >= batch_size:
            Article.bulk(batch, refresh=True)
            del batch[:]
    
    if len(batch) > 0:
        Article.bulk(batch)
    def test_01_get_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        doi_duplicates_arg = kwargs.get("doi_duplicates")
        fulltext_duplicates_arg = kwargs.get("fulltext_duplicates")
        overlap_arg = kwargs.get("overlap")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        doi_duplicates = -1
        if doi_duplicates_arg not in ["-"]:
            doi_duplicates = int(doi_duplicates_arg)

        fulltext_duplicates = -1
        if fulltext_duplicates_arg not in ["-"]:
            fulltext_duplicates = int(fulltext_duplicates_arg)

        overlap = -1
        if overlap_arg not in ["-"]:
            overlap = int(overlap_arg)

        expected_count = doi_duplicates + fulltext_duplicates - overlap

        ###############################################
        ## set up

        owner = None
        if owner_arg != "no":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # generate our incoming article
        article = None
        if article_arg == "yes":
            source = ArticleFixtureFactory.make_article_source()
            article = Article(**source)
            article.set_id()

        mock = BLLArticleMockFactory.discover_duplicates(
            doi_duplicates, fulltext_duplicates, overlap)
        self.svc.discover_duplicates = mock

        # determine if we expect a merge conflict
        dds = 0 if doi_duplicates < 0 else doi_duplicates
        fds = 0 if fulltext_duplicates < 0 else fulltext_duplicates
        ol = 0 if overlap < 0 else overlap
        expect_merge_conflict = dds + fds - ol > 1

        ###########################################################
        # Execution

        first_article = None

        # first do get_duplicates
        if raises is not None:
            with self.assertRaises(raises):
                self.svc.get_duplicates(article)
        else:
            duplicates = self.svc.get_duplicates(article)

            if len(duplicates) > 0:
                first_article = duplicates[0]

            # check that we have the number of results we expected
            assert len(duplicates) == expected_count

            # check that the articles are unique in the list
            article_ids = [a.id for a in duplicates]
            article_ids.sort()
            deduped = list(set(article_ids))
            deduped.sort(
            )  # so it's comparable to the article_ids list, as the set() call destroys ordering
            assert article_ids == deduped  # i.e. that there were no duplicates

            # check that the articles are ordered by last_updated
            last_updateds = [
                datetime.strptime(a.last_updated, "%Y-%m-%dT%H:%M:%SZ")
                for a in duplicates
            ]
            sorted_lu = sorted(last_updateds, reverse=True)
            assert sorted_lu == last_updateds  # i.e. they were already sorted

        # then the same again on the singular get_duplicate
        if raises is not None:
            with self.assertRaises(raises):
                self.svc.get_duplicate(article)
        elif expect_merge_conflict:
            with self.assertRaises(exceptions.ArticleMergeConflict):
                self.svc.get_duplicate(article)
        else:
            duplicate = self.svc.get_duplicate(article)

            if expected_count > 0:
                assert isinstance(duplicate, Article)
                assert duplicate.id == first_article.id
            else:
                assert duplicate is None
Esempio n. 17
0
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                        limit_to_account, add_journal_info, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check, merge_duplicate,
                                             limit_to_account, add_journal_info, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None

            if add_journal_info:
                assert article.bibjson().journal_title == "Add Journal Info Title"
Esempio n. 18
0
class TestCreateOrUpdateArticle(DoajTestCase):
    def setUp(self):
        super(TestCreateOrUpdateArticle, self).setUp()

        self.publisher = Account()
        self.publisher.add_role("publisher")
        self.publisher.save(blocking=True)

        self.admin = Account()
        self.admin.add_role("admin")
        self.admin.save(blocking=True)

        sources = JournalFixtureFactory.make_many_journal_sources(2, True)
        self.journal1 = Journal(**sources[0])
        self.journal1.set_owner(self.publisher.id)
        jbib1 = self.journal1.bibjson()
        jbib1.add_identifier(jbib1.P_ISSN, "1111-1111")
        jbib1.add_identifier(jbib1.E_ISSN, "2222-2222")
        self.journal1.save(blocking=True)

        self.publisher.add_journal(self.journal1)

        self.journal2 = Journal(**sources[1])
        jbib2 = self.journal2.bibjson()
        jbib2.add_identifier(jbib2.P_ISSN, "1234-5678")
        jbib2.add_identifier(jbib2.E_ISSN, "9876-5432")
        self.journal2.save(blocking=True)

        self.article10 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-10",
            fulltext="https://www.article10.com"))
        self.article10.set_id("articleid10")
        self.article10.save(blocking=True)

        self.article11 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1111-1111",
            eissn="2222-2222",
            doi="10.0000/article-11",
            fulltext="https://www.article11.com"))
        self.article11.set_id("articleid11")
        self.article11.save(blocking=True)

        self.article2 = Article(**ArticleFixtureFactory.make_article_source(
            pissn="1234-5678",
            eissn="9876-5432",
            doi="10.0000/article-2",
            fulltext="https://www.article2.com"))
        self.article2.set_id("articleid2")
        self.article2.save(blocking=True)

    def tearDown(self):
        super(TestCreateOrUpdateArticle, self).tearDown()

    def test_00_no_doi_and_url_changed(self):
        ba = self.article10.bibjson()
        ba.title = "Updated Article"

        # try for admin

        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \
            .format(self.article10.bibjson().title)

        ba.title = "Updated 2nd time"

        # try for publisher

        resp = ArticleService.create_article(self=ArticleService(),
                                             account=self.publisher,
                                             article=self.article10)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.bibjson().title == "Updated 2nd time", "Expected `Updated 2nd time`, received: {}" \
            .format(self.article10.bibjson().title)

    def test_01_new_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        ba.remove_urls(ba.FULLTEXT)
        ba.add_identifier(ba.DOI, "10.0000/NEW")
        ba.add_url(ba.FULLTEXT, "https://www.UPDATED.com")

        #for publisher
        resp = ArticleService.create_article(self=ArticleService(),
                                             account=self.publisher,
                                             article=self.article10)
        assert resp["success"] == 1, "expected 1 new, received: {}".format(
            resp)
        assert resp["update"] == 0, "expected 1 new, received: {}".format(resp)
        assert resp["new"] == 1, "expected 1 new, received: {}".format(resp)

        #for admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 new, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 new, received: {}".format(resp)
        assert resp["new"] == 0, "expected 1 new, received: {}".format(resp)

    def test_02_old_doi_existing_url_admin(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        # check for url from other article owned by the same publisher
        ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

        # check for url from other article owned by someone else
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url(self.article2.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_03_existing_doi_old_url_admin(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        # check for DOI from other article owned by the same publisher
        ba.add_identifier(ba.DOI, "10.0000/article-11")

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

        ba.remove_identifiers(ba.DOI)
        # check for DOI from other article owned by someone else
        ba.add_identifier(ba.DOI, "10.0000/article-2")

        # try as a publisher
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(ArticleMergeConflict):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_04_old_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url("https://updated.com", ba.FULLTEXT)

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 new, received: {}".format(resp)
        assert self.article10.get_normalised_fulltext(
        ) == "//updated.com", "expected //updated.com, received: {}".format(
            self.article10.get_normalised_fulltext())

    def test_05_new_doi_old_url(self):
        ba = self.article10.bibjson()
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI, "10.0000/article-UPDATED")

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        resp = ArticleService.create_article(
            self=ArticleService(),
            account=self.admin,
            article=self.article10,
            update_article_id=self.article10.id)

        assert resp["success"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["update"] == 1, "expected 1 updated, received: {}".format(
            resp)
        assert resp["new"] == 0, "expected 1 updated, received: {}".format(
            resp)
        assert self.article10.get_normalised_doi() == "10.0000/article-UPDATED", \
            "expected 10.0000/article-UPDATED, received: {}".format(
                self.article10.get_normalised_fulltext())

    def test_06_existing_doi_new_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url("https://updated.com", ba.FULLTEXT)
        # check for doi from other article of the same publisher
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI,
                          self.article11.bibjson().get_one_identifier(ba.DOI))

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)

    def test_07_new_doi_existing_url(self):
        ba = self.article10.bibjson()
        ba.remove_urls(ba.FULLTEXT)
        ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT),
                   ba.FULLTEXT)
        # check for doi from other article of the same publisher
        ba.remove_identifiers(ba.DOI)
        ba.add_identifier(ba.DOI, "10.0000/article-UPDATED")

        # try as publisher
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.publisher,
                                          article=self.article10)

        # try as an admin
        with self.assertRaises(DuplicateArticleException):
            ArticleService.create_article(self=ArticleService(),
                                          account=self.admin,
                                          article=self.article10,
                                          update_article_id=self.article10.id)
Esempio n. 19
0
 def all_articles(self):
     from portality.models import Article
     return Article.find_by_issns(self.known_issns())
Esempio n. 20
0
def migrate_articles(source, batch_size=5000):
    # read in the content
    f = open(source)
    xml = etree.parse(f)
    f.close()
    articles = xml.getroot()
    print "migrating", str(len(articles)), "article records from", source
    
    counter = 0
    omissions = 0
    batch = []
    for element in articles:
        a = Article()
        b = _to_article_bibjson(element)
        a.set_bibjson(b)
        hasjournal = _add_journal_info(a)
        
        if not hasjournal:
            print "INFO: omitting article"
            omissions += 1
            continue
        
        a.set_created(_created_date(element))
        a.set_id()
        a.prep() # prepare the thing to be saved, which is necessary since we're not actually going to save()
        batch.append(a.data)
        
        if len(batch) >= batch_size:
            counter += len(batch)
            print "Writing batch, size", len(batch)
            Article.bulk(batch, refresh=True)
            print "batch written, total so far", counter
            del batch[:]
    
    if len(batch) > 0:
        counter += len(batch)
        print "Writing final batch, size", len(batch)
        Article.bulk(batch, refresh=True)
        print "batch written, total written", counter
    
    print "wrote", counter, "articles, omitted", omissions
    def test_01_batch_create_article(self, name, kwargs):

        articles_arg = kwargs.get("articles")
        duplicate_in_batch_arg = kwargs.get("duplicate_in_batch")
        duplicate_in_index_arg = kwargs.get("duplicate_in_index")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        fail_arg = kwargs.get("fail")
        update_arg = kwargs.get("update")

        ###############################################
        ## set up

        success = int(success_arg)
        fail = int(fail_arg)
        update = int(update_arg)

        duplicate_in_batch = duplicate_in_batch_arg == "yes"
        duplicate_in_index = int(duplicate_in_index_arg)

        raises = EXCEPTIONS.get(raises_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        journal_specs = []
        last_doi = None
        last_ft = None
        last_issn = None
        last_id = None
        articles = None
        if articles_arg != "none":
            articles = []
            if articles_arg == "yes":
                # one with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="0000-0000",
                    pissn="0000-0000",
                    doi="10.123/abc/0",
                    fulltext=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0000"})

                # another with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="1111-1111",
                    pissn="1111-1111",
                    doi="10.123/abc/1",
                    fulltext=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "1", "pissn" : "1111-1111", "eissn" : "1111-1111"})

                # one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="2222-2222",
                    pissn="2222-2222",
                    fulltext="http://example.com/2",
                    doi=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2222"})

                # another one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="3333-3333",
                    pissn="3333-3333",
                    fulltext="http://example.com/3",
                    doi=False
                )
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3333"})

                last_issn = "3333-3333"
                last_doi = "10.123/abc/1"
                last_ft = "http://example.com/3"
                last_id = articles[-1].id

                if duplicate_in_batch:
                    # one with a duplicated DOI
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="4444-4444",
                        pissn="4444-4444",
                        doi="10.123/abc/0",
                        fulltext="http://example.com/4"
                    )
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4444"})

                    # one with a duplicated Fulltext
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="5555-5555",
                        pissn="5555-5555",
                        doi="10.123/abc/5",
                        fulltext="http://example.com/1"
                    )
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5555"})

        ilo_mock = None
        if account_arg == "owner":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True)
        elif account_arg == "own_1":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit_on_issn=[last_issn])
        else:
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner()
        self.svc.is_legitimate_owner = ilo_mock

        gd_mock = None
        if duplicate_in_index == 1:
            gd_mock = BLLArticleMockFactory.get_duplicate(given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft)
        elif duplicate_in_index == 2:
            gd_mock = BLLArticleMockFactory.get_duplicate(merge_conflict=True)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], [])
        self.svc.issn_ownership_status = ios_mock

        if add_journal_info:
            gj_mock = ModelArticleMockFactory.get_journal(journal_specs)
            Article.get_journal = gj_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                try:
                    self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate,
                                                   limit_to_account, add_journal_info)
                except exceptions.IngestException as e:
                    if duplicate_in_index != 2:
                        report = e.result
                        assert report["success"] == success
                        assert report["fail"] == fail
                        assert report["update"] == update
                        assert report["new"] == success - update
                    raise
        else:
            report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate,
                                                    limit_to_account, add_journal_info)

            # make sure all the articles are saved before running the asserts
            aids = [(a.id, a.last_updated) for a in articles]
            for aid, lu in aids:
                Article.block(aid, lu, sleep=0.05)

            assert report["success"] == success
            assert report["fail"] == fail
            assert report["update"] == update
            assert report["new"] == success - update

            if success > 0:
                all_articles = Article.all()
                if len(all_articles) != success:
                    time.sleep(0.5)
                    all_articles = Article.all()
                assert len(all_articles) == success
                for article in all_articles:
                    if add_journal_info:
                        assert article.bibjson().journal_title is not None
                    else:
                        assert article.bibjson().journal_title is None

            else:
                # there's nothing in the article index
                with self.assertRaises(ESMappingMissingError):
                    Article.all()
Esempio n. 22
0
class TestAdminEditMetadata(DoajTestCase):
    def setUp(self):
        super(TestAdminEditMetadata, self).setUp()
        admin_account = Account.make_account(username="******",
                                             name="Admin",
                                             email="*****@*****.**",
                                             roles=["admin"])
        admin_account.set_password('password123')
        admin_account.save()

        publisher_account = Account.make_account(username="******",
                                                 name="Publisher",
                                                 email="*****@*****.**",
                                                 roles=["publisher"])
        publisher_account.set_password('password456')
        publisher_account.save(blocking=True)

        self.j = Journal(**JournalFixtureFactory.make_journal_source(
            in_doaj=True))
        self.j.save(blocking=True)
        self.a = Article(**ArticleFixtureFactory.make_article_source(
            in_doaj=True))
        self.a.save(blocking=True)

    def tearDown(self):
        super(TestAdminEditMetadata, self).tearDown()
        del self.a
        del self.j

    def admin_post_article_metadata_form(self, formdata):
        """ Post a form tto the article metadata endpoint """
        with self.app_test.test_client() as t_client:
            self.login(t_client, "admin", "password123")
            resp = t_client.post(url_for('admin.article_page',
                                         article_id=self.a.id),
                                 data=dict(formdata))
            assert resp.status_code == 200, "expected: 200, received: {}".format(
                resp.status)

    @staticmethod
    def login(app, username, password):
        return app.post('/account/login',
                        data=dict(username=username, password=password),
                        follow_redirects=True)

    @staticmethod
    def logout(app):
        return app.get('/account/logout', follow_redirects=True)

    def test_01_open_article_page(self):
        """ Ensure only Admin can open the article metadata form """

        with self.app_test.test_client() as t_client:
            self.login(t_client, "admin", "password123")
            resp = t_client.get(url_for('admin.article_page',
                                        article_id=self.a.id),
                                follow_redirects=False)
            assert resp.status_code == 200, "expected: 200, received: {}".format(
                resp.status)

        # user not logged in
        with self._make_and_push_test_context():
            with self.app_test.test_client() as t_client:
                resp = t_client.get(url_for('admin.article_page',
                                            article_id=self.a.id),
                                    follow_redirects=False)
                assert resp.status_code == 302, "expected: 302, received: {}".format(
                    resp.status)  #expect redirection to login page

        # login as publisher
        with self.app_test.test_client() as t_client:
            self.login(t_client, "publisher", "password456")
            resp = t_client.get(url_for('admin.article_page',
                                        article_id=self.a.id),
                                follow_redirects=False)
            assert resp.status_code == 302, "expected: 302, received: {}".format(
                resp.status)  # expect redirection to login page

    def test_02_update_article_metadata_no_url_fulltext(self):
        """ Update an article with no change to identifying fields: URL and DOI """

        source = ArticleMetadataFactory(
            article_source=self.a).update_article_no_change_to_url_and_doi()

        # Submit the form
        self.admin_post_article_metadata_form(source)

        # Retrieve the result
        a = Article.pull(self.a.id)
        b = a.bibjson()
        assert b.title == source[
            'title'], 'expected updated title, received: {}'.format(b.title)

    def test_03_update_fulltext_valid(self):
        """ Update an article's fulltext URL """
        source = ArticleMetadataFactory(
            article_source=self.a).update_article_fulltext(valid=True)

        # Submit the form
        self.admin_post_article_metadata_form(source)

        a = Article.pull(self.a.id)
        bj = a.bibjson()
        # expect updated fulltext url
        assert bj.get_single_url(
            "fulltext"
        ) == 'https://www.newarticleurl.co.uk/fulltext', 'expected updated url, received: {}'.format(
            bj.get_single_url("fulltext"))

    def test_04_update_fulltext_invalid(self):
        """ The form should ignore an update that has the same fulltext URL as an existing article """
        source = ArticleMetadataFactory(
            article_source=self.a).update_article_fulltext(valid=False)

        a1source = ArticleFixtureFactory.make_article_source(in_doaj=True)
        a1source["id"] = 'aaaaaaaaa_article'
        a1source["fulltext"] = "https://www.urltorepeat.com"
        a1 = Article(**a1source)
        a1.save(blocking=True)

        # Submit the form
        self.admin_post_article_metadata_form(source)

        # Retrieve the result - it should be unchanged
        a = Article.pull(self.a.id)
        bj = a.bibjson()
        assert bj.title == "Article Title", 'expect old title, received: {}'.format(
            bj.title)
        assert bj.get_single_url(
            "fulltext"
        ) == 'http://www.example.com/article', 'expected old url, received: {}'.format(
            bj.get_single_url("fulltext"))

    def test_05_update_doi_valid(self):
        """ The form should allow an update with a new valid DOI """
        source = ArticleMetadataFactory(
            article_source=self.a).update_article_doi(valid=True)

        # Submit the form
        self.admin_post_article_metadata_form(source)

        # Retrieve the result
        a = Article.pull(self.a.id)
        bj = a.bibjson()
        # expect new data
        assert bj.title == "New title", 'expect updated title, received: {}'.format(
            bj.title)
        assert bj.get_one_identifier(
            "doi"
        ) == '10.1111/article-0', 'expected new doi, received: {}'.format(
            bj.get_single_identifier("doi"))

    def test_06_update_doi_invalid(self):
        source = ArticleMetadataFactory(
            article_source=self.a).update_article_doi(valid=False)

        a1source = ArticleFixtureFactory.make_article_source(in_doaj=True)
        a1source['id'] = 'aaaaaaaaa_article'
        a1source["fulltext"] = "https://www.someurl.com"
        a1source["doi"] = '10.1234/article'
        a1 = Article(**a1source)
        a1.save(blocking=True)

        # Submit the form
        self.admin_post_article_metadata_form(source)

        a = Article.pull(self.a.id)
        bj = a.bibjson()
        # expect old data
        assert bj.title == "Article Title", 'expect old title, received: {}'.format(
            bj.title)
        assert bj.get_one_identifier(
            "doi"
        ) == '10.0000/SOME.IDENTIFIER', 'expected old doi, received: {}'.format(
            bj.get_one_identifier("doi"))
Esempio n. 23
0
    def test_01_create_article(self, value, kwargs):

        article_arg = kwargs.get("article")
        account_arg = kwargs.get("account")
        get_duplicate_result_arg = kwargs.get("get_duplicate_result")
        role_arg = kwargs.get("role")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")
        update_article_id_arg = kwargs.get("update_article_id")
        has_ft_doi_changed_arg = kwargs.get("has_ft_doi_changed_arg")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        has_ft_doi_changed = True if has_ft_doi_changed_arg == "yes" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"
        doi = "10.123/abc/1"
        fulltext = "http://example.com/1"

        another_doi = "10.123/duplicate-1"

        another_eissn = "1111-1111"
        another_pissn = "2222-2222"

        duplicate_id = None
        original_id = None
        update_article_id = None

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        if get_duplicate_result_arg == 'different':
            source = ArticleFixtureFactory.make_article_source(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext)
            del source["bibjson"]["journal"]
            duplicate = Article(**source)
            duplicate.save()
            duplicate_id = duplicate.id

        article_id_to_upload = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            article_id_to_upload = article.id

        if get_duplicate_result_arg == "itself":
            source = ArticleFixtureFactory.make_article_source(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext)
            del source["bibjson"]["journal"]
            duplicate = Article(**source)
            duplicate.set_id(article_id_to_upload)
            duplicate.save()
            duplicate_id = duplicate.id

        if update_article_id_arg != "none":

            another_source = ArticleFixtureFactory.make_article_source(
                eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            original = Article(**another_source)
            original.save(blocking=True)
            original_id = original.id

            if update_article_id_arg == "doi_ft_not_changed":
                article.bibjson().title = "This needs to be updated"

            elif update_article_id_arg == "doi_ft_changed_duplicate":

                article.bibjson().remove_identifiers("doi")
                article.bibjson().add_identifier("doi", another_doi)

            elif update_article_id_arg == "doi_ft_changed_ok":

                article.bibjson().remove_identifiers("doi")
                article.bibjson().add_identifier("doi", "10.1234/updated")

        else:
            update_article_id = None

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(
            owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        if role_arg == "admin":
            account.set_role("admin")

        account.save()

        if get_duplicate_result_arg == "none":
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)

        elif get_duplicate_result_arg == "itself":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=eissn,
                pissn=pissn,
                doi=doi,
                fulltext=fulltext,
                given_article_id=original_id)
        elif get_duplicate_result_arg == "different":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=another_eissn,
                pissn=another_pissn,
                doi=doi,
                fulltext=fulltext,
                given_article_id=duplicate_id)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(
                given_article_id="exception")
        self.svc.get_duplicate = gd_mock
        mock_article = self.svc.get_duplicate(article)

        if role_arg == "admin" or (role_arg == "publisher"
                                   and account_arg == "owner"):
            has_permissions_mock = BLLArticleMockFactory.has_permissions(True)
        else:
            has_permissions_mock = BLLArticleMockFactory.has_permissions(False)
        self.svc.has_permissions = has_permissions_mock

        prepare_update_admin_mock = BLLArticleMockFactory._prepare_update_admin(
            get_duplicate_result_arg, update_article_id_arg)
        self.svc._prepare_update_admin = prepare_update_admin_mock

        prepare_update_publisher_mock = BLLArticleMockFactory._prepare_update_publisher(
            get_duplicate_result_arg, has_ft_doi_changed)
        self.svc._prepare_update_publisher = prepare_update_publisher_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article,
                                        account,
                                        merge_duplicate=merge_duplicate,
                                        add_journal_info=add_journal_info,
                                        dry_run=dry_run,
                                        update_article_id=original_id)
        else:
            report = self.svc.create_article(article,
                                             account,
                                             merge_duplicate=merge_duplicate,
                                             add_journal_info=add_journal_info,
                                             dry_run=dry_run,
                                             update_article_id=original_id)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes" and update_article_id is not None:
                if get_duplicate_result_arg == "itself":
                    original = Article.pull(update_article_id)
                    assert original is not None
                    assert report["update"] == 1, "update: {}".format(
                        report["update"])
                    assert report["new"] == 0, "update: {}".format(
                        report["new"])
            elif original_saved_arg == "yes":
                if get_duplicate_result_arg == "itself":
                    new = Article.pull(article_id_to_upload)
                    assert new is not None
                    assert report["update"] == 1, "update: {}".format(
                        report["update"])
                    assert report["new"] == 0, "update: {}".format(
                        report["new"])
                elif get_duplicate_result_arg == "none":
                    new = Article.pull(article_id_to_upload)
                    assert new is not None
                    assert report["update"] == 0, "update: {}".format(
                        report["update"])
                    assert report["new"] == 1, "update: {}".format(
                        report["new"])

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None and mock_article.id != original_id:
                merged = Article.pull(mock_article.id)
                assert merged is None, "merged: {}".format(merged)

            if add_journal_info:
                assert article.bibjson(
                ).journal_title == "Add Journal Info Title"

            if update_article_id_arg == "doi_ft_changed_ok":
                original = Article.pull(original_id)
                assert original is not None
            elif update_article_id_arg == "doi_ft_not_changed":
                original = Article.pull(original_id)
                assert original is not None
    def test_01_discover_duplicates(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_doi_arg = kwargs.get("article_doi")
        doi_duplicate_arg = kwargs.get("doi_duplicate")
        article_fulltext_arg = kwargs.get("article_fulltext")
        fulltext_duplicate_arg = kwargs.get("fulltext_duplicate")
        articles_by_doi_arg = kwargs.get("articles_by_doi")
        articles_by_fulltext_arg = kwargs.get("articles_by_fulltext")
        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # create a journal for the owner
        if owner_arg not in ["none"]:
            source = JournalFixtureFactory.make_journal_source(in_doaj=True)
            journal = Journal(**source)
            journal.set_owner(owner.id)
            journal.bibjson().remove_identifiers()
            journal.bibjson().add_identifier("eissn", "1234-5678")
            journal.bibjson().add_identifier("pissn", "9876-5432")
            journal.save(blocking=True)

        # determine what we need to load into the index
        article_ids = []
        aids_block = []
        if owner_arg not in ["none", "no_articles"]:
            for i, ident in enumerate(IDENTS):
                the_doi = ident["doi"]
                if doi_duplicate_arg == "padded":
                    the_doi = "  " + the_doi + "  "
                elif doi_duplicate_arg == "prefixed":
                    the_doi = "https://dx.doi.org/" + the_doi

                the_fulltext = ident["fulltext"]
                if article_fulltext_arg != "invalid":
                    if fulltext_duplicate_arg == "padded":
                        the_fulltext = "  http:" + the_fulltext
                    elif fulltext_duplicate_arg == "http":
                        the_fulltext = "http:" + the_fulltext
                    elif fulltext_duplicate_arg == "https":
                        the_fulltext = "https:" + the_fulltext
                    else:
                        the_fulltext = "http:" + the_fulltext

                source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext)
                article = Article(**source)
                article.set_id()
                article.save()
                article_ids.append(article.id)
                aids_block.append((article.id, article.last_updated))

        # generate our incoming article
        article = None
        doi = None
        fulltext = None
        if article_arg == "yes":
            eissn = "1234=5678" # one matching
            pissn = "6789-1234" # the other not - issn matches are not relevant to this test

            if article_doi_arg in ["yes", "padded"]:
                doi = "10.1234/abc/11"
                if doi_duplicate_arg in ["yes", "padded"]:
                    doi = IDENTS[0]["doi"]
                if article_doi_arg == "padded":
                    doi = "  doi:" + doi + "  "
            elif article_doi_arg in ["invalid"]:
                doi = IDENTS[-1]["doi"]

            if article_fulltext_arg in ["yes", "padded", "https"]:
                fulltext = "//example.com/11"
                if fulltext_duplicate_arg in ["yes", "padded", "https"]:
                    fulltext = IDENTS[0]["fulltext"]
                if fulltext_duplicate_arg == "padded":
                    fulltext = "  http:" + fulltext + "  "
                elif fulltext_duplicate_arg == "https":
                    fulltext = "https:" + fulltext
                else:
                    fulltext = "http:" + fulltext
            elif article_fulltext_arg == "invalid":
                fulltext = IDENTS[-1]["fulltext"]

            source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext)
            article = Article(**source)

            # we need to do this if doi or fulltext are none, because the factory will set a default if we don't
            # provide them
            if doi is None:
                article.bibjson().remove_identifiers("doi")
            if fulltext is None:
                article.bibjson().remove_urls("fulltext")

            article.set_id()

        Article.blockall(aids_block)

        ###########################################################
        # Execution

        svc = DOAJ.articleService()
        if raises is not None:
            with self.assertRaises(raises):
                svc.discover_duplicates(article, owner_id)
        else:
            possible_articles = svc.discover_duplicates(article, owner_id)

            if articles_by_doi_arg == "yes":
                assert "doi" in possible_articles
                assert len(possible_articles["doi"]) == 1
                # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first
                if article_doi_arg == "invalid":
                    assert possible_articles["doi"][0].id == article_ids[-1]
                else:
                    assert possible_articles["doi"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "doi" not in possible_articles

            if articles_by_fulltext_arg == "yes":
                assert "fulltext" in possible_articles
                assert len(possible_articles["fulltext"]) == 1
                # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first
                if article_fulltext_arg == "invalid":
                    assert possible_articles["fulltext"][0].id == article_ids[-1]
                else:
                    assert possible_articles["fulltext"][0].id == article_ids[0]
            else:
                if possible_articles is not None:
                    assert "fulltext" not in possible_articles
Esempio n. 25
0
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi="10.123/abc/1", fulltext="http://example.com/1")
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = ["1234-5678", "9876-5432"] if account_arg == "owner" else []
        shared = []
        unowned = ["1234-5678"] if account_arg == "not_owner" else []
        unmatched = ["9876-5432"] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(eissn="1234-5678", pissn="9876-5432", doi="10.123/abc/1", fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None
    def test_01_batch_create_article(self, name, kwargs):

        articles_arg = kwargs.get("articles")
        duplicate_in_batch_arg = kwargs.get("duplicate_in_batch")
        duplicate_in_index_arg = kwargs.get("duplicate_in_index")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        fail_arg = kwargs.get("fail")
        update_arg = kwargs.get("update")

        ###############################################
        ## set up

        success = int(success_arg)
        fail = int(fail_arg)
        update = int(update_arg)

        duplicate_in_batch = duplicate_in_batch_arg == "yes"
        duplicate_in_index = int(duplicate_in_index_arg)

        raises = EXCEPTIONS.get(raises_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        journal_specs = []
        last_doi = None
        last_ft = None
        last_issn = None
        last_id = None
        articles = None
        if articles_arg != "none":
            articles = []
            if articles_arg == "yes":
                # one with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="0000-0000",
                    pissn="0000-0000",
                    doi="10.123/abc/0",
                    fulltext=False)
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({
                        "title": "0",
                        "pissn": "0000-0000",
                        "eissn": "0000-0000"
                    })

                # another with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="1111-1111",
                    pissn="1111-1111",
                    doi="10.123/abc/1",
                    fulltext=False)
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({
                        "title": "1",
                        "pissn": "1111-1111",
                        "eissn": "1111-1111"
                    })

                # one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="2222-2222",
                    pissn="2222-2222",
                    fulltext="http://example.com/2",
                    doi=False)
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({
                        "title": "2",
                        "pissn": "2222-2222",
                        "eissn": "2222-2222"
                    })

                # another one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="3333-3333",
                    pissn="3333-3333",
                    fulltext="http://example.com/3",
                    doi=False)
                del source["bibjson"]["journal"]
                article = Article(**source)
                article.set_id()
                articles.append(article)
                if add_journal_info:
                    journal_specs.append({
                        "title": "3",
                        "pissn": "3333-3333",
                        "eissn": "3333-3333"
                    })

                last_issn = "3333-3333"
                last_doi = "10.123/abc/1"
                last_ft = "http://example.com/3"
                last_id = articles[-1].id

                if duplicate_in_batch:
                    # one with a duplicated DOI
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="4444-4444",
                        pissn="4444-4444",
                        doi="10.123/abc/0",
                        fulltext="http://example.com/4")
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({
                            "title": "4",
                            "pissn": "4444-4444",
                            "eissn": "4444-4444"
                        })

                    # one with a duplicated Fulltext
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="5555-5555",
                        pissn="5555-5555",
                        doi="10.123/abc/5",
                        fulltext="http://example.com/1")
                    del source["bibjson"]["journal"]
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                    if add_journal_info:
                        journal_specs.append({
                            "title": "5",
                            "pissn": "5555-5555",
                            "eissn": "5555-5555"
                        })

        ilo_mock = None
        if account_arg == "owner":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True)
        elif account_arg == "own_1":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(
                legit_on_issn=[last_issn])
        else:
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner()
        self.svc.is_legitimate_owner = ilo_mock

        gd_mock = None
        if duplicate_in_index == 1:
            gd_mock = BLLArticleMockFactory.get_duplicate(
                given_article_id=last_id,
                eissn=last_issn,
                pissn=last_issn,
                doi=last_doi,
                fulltext=last_ft)
        elif duplicate_in_index == 2:
            gd_mock = BLLArticleMockFactory.get_duplicate(merge_duplicate=True)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], [])
        self.svc.issn_ownership_status = ios_mock

        self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated(
            False, False)

        if add_journal_info:
            gj_mock = ModelArticleMockFactory.get_journal(journal_specs)
            Article.get_journal = gj_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                try:
                    self.svc.batch_create_articles(articles, account,
                                                   duplicate_check,
                                                   merge_duplicate,
                                                   limit_to_account,
                                                   add_journal_info)
                except exceptions.IngestException as e:
                    if duplicate_in_index != 2:
                        report = e.result
                        assert report["success"] == success
                        assert report["fail"] == fail
                        assert report["update"] == update
                        assert report["new"] == success - update
                    raise
        else:
            report = self.svc.batch_create_articles(articles, account,
                                                    duplicate_check,
                                                    merge_duplicate,
                                                    limit_to_account,
                                                    add_journal_info)

            # make sure all the articles are saved before running the asserts
            aids = [(a.id, a.last_updated) for a in articles]
            for aid, lu in aids:
                Article.block(aid, lu, sleep=0.05)

            assert report["success"] == success
            assert report["fail"] == fail
            assert report["update"] == update
            assert report["new"] == success - update

            if success > 0:
                all_articles = Article.all()
                if len(all_articles) != success:
                    time.sleep(0.5)
                    all_articles = Article.all()
                assert len(all_articles) == success
                for article in all_articles:
                    if add_journal_info:
                        assert article.bibjson().journal_title is not None
                    else:
                        assert article.bibjson().journal_title is None

            else:
                # there's nothing in the article index
                with self.assertRaises(ESMappingMissingError):
                    Article.all()
    def test_01_batch_create_article(self, name, kwargs):

        articles_arg = kwargs.get("articles")
        duplicate_in_batch_arg = kwargs.get("duplicate_in_batch")
        duplicate_in_index_arg = kwargs.get("duplicate_in_index")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        fail_arg = kwargs.get("fail")
        update_arg = kwargs.get("update")

        ###############################################
        ## set up

        success = int(success_arg)
        fail = int(fail_arg)
        update = int(update_arg)

        duplicate_in_batch = duplicate_in_batch_arg == "yes"
        duplicate_in_index = duplicate_in_index_arg == "yes"

        raises = EXCEPTIONS.get(raises_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        last_doi = None
        last_ft = None
        last_issn = None
        last_id = None
        articles = None
        if articles_arg != "none":
            articles = []
            if articles_arg == "yes":
                # one with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="0000-0000",
                    pissn="0000-0000",
                    doi="10.123/abc/0",
                    fulltext=False)
                article = Article(**source)
                article.set_id()
                articles.append(article)

                # another with a DOI and no fulltext
                source = ArticleFixtureFactory.make_article_source(
                    eissn="1111-1111",
                    pissn="1111-1111",
                    doi="10.123/abc/1",
                    fulltext=False)
                article = Article(**source)
                article.set_id()
                articles.append(article)

                # one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="2222-2222",
                    pissn="2222-2222",
                    fulltext="http://example.com/2",
                    doi=False)
                article = Article(**source)
                article.set_id()
                articles.append(article)

                # another one with a fulltext and no DOI
                source = ArticleFixtureFactory.make_article_source(
                    eissn="3333-3333",
                    pissn="3333-3333",
                    fulltext="http://example.com/3",
                    doi=False)
                article = Article(**source)
                article.set_id()
                articles.append(article)

                last_issn = "3333-3333"
                last_doi = "10.123/abc/1"
                last_ft = "http://example.com/3"
                last_id = articles[-1].id

                if duplicate_in_batch:
                    # one with a duplicated DOI
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="4444-4444",
                        pissn="4444-4444",
                        doi="10.123/abc/0",
                        fulltext="http://example.com/4")
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)

                    # one with a duplicated Fulltext
                    source = ArticleFixtureFactory.make_article_source(
                        eissn="5555-5555",
                        pissn="5555-5555",
                        doi="10.123/abc/5",
                        fulltext="http://example.com/1")
                    article = Article(**source)
                    article.set_id()
                    articles.append(article)
                """
                article_count = int(articles_arg)
                for i in range(article_count):
                    idx = str(i)
                    if duplicate_in_batch:
                        if i < 2:
                            idx = "duplicate"
                    last_issn = str(i) * 4 + "-" + str(i) * 4
                    last_doi = "10.123/abc/" + idx
                    last_ft = "http://example.com/" + idx
                    source = ArticleFixtureFactory.make_article_source(eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft)
                    article = Article(**source)
                    article.set_id()
                    last_id = article.id
                    articles.append(article)
                """

        ilo_mock = None
        if account_arg == "owner":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True)
        elif account_arg == "own_1":
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner(
                legit_on_issn=[last_issn])
        else:
            ilo_mock = BLLArticleMockFactory.is_legitimate_owner()
        self.svc.is_legitimate_owner = ilo_mock

        gd_mock = None
        if duplicate_in_index:
            gd_mock = BLLArticleMockFactory.get_duplicate(
                given_article_id=last_id,
                eissn=last_issn,
                pissn=last_issn,
                doi=last_doi,
                fulltext=last_ft)
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], [])
        self.svc.issn_ownership_status = ios_mock

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                try:
                    self.svc.batch_create_articles(articles, account,
                                                   duplicate_check,
                                                   merge_duplicate,
                                                   limit_to_account)
                except exceptions.IngestException as e:
                    report = e.result
                    assert report["success"] == success
                    assert report["fail"] == fail
                    assert report["update"] == update
                    assert report["new"] == success - update
                    raise
        else:
            report = self.svc.batch_create_articles(articles, account,
                                                    duplicate_check,
                                                    merge_duplicate,
                                                    limit_to_account)

            # make sure all the articles are saved before running the asserts
            aids = [(a.id, a.last_updated) for a in articles]
            for aid, lu in aids:
                Article.block(aid, lu, sleep=0.05)

            assert report["success"] == success
            assert report["fail"] == fail
            assert report["update"] == update
            assert report["new"] == success - update

            if success > 0:
                all_articles = Article.all()
                if len(all_articles) != success:
                    time.sleep(0.5)
                    all_articles = Article.all()
                assert len(all_articles) == success

            else:
                # there's nothing in the article index
                with self.assertRaises(ESMappingMissingError):
                    Article.all()
    def test_01_create_article(self, name, kwargs):

        article_arg = kwargs.get("article")
        article_duplicate_arg = kwargs.get("article_duplicate")
        account_arg = kwargs.get("account")
        duplicate_check_arg = kwargs.get("duplicate_check")
        merge_duplicate_arg = kwargs.get("merge_duplicate")
        limit_to_account_arg = kwargs.get("limit_to_account")
        add_journal_info_arg = kwargs.get("add_journal_info")
        dry_run_arg = kwargs.get("dry_run")

        raises_arg = kwargs.get("raises")
        success_arg = kwargs.get("success")
        original_saved_arg = kwargs.get("original_saved")
        merge_saved_arg = kwargs.get("merge_saved")

        ###############################################
        ## set up

        success = int(success_arg)

        duplicate_check = None
        if duplicate_check_arg != "none":
            duplicate_check = True if duplicate_check_arg == "true" else False

        merge_duplicate = None
        if merge_duplicate_arg != "none":
            merge_duplicate = True if merge_duplicate_arg == "true" else False

        limit_to_account = None
        if limit_to_account_arg != "none":
            limit_to_account = True if limit_to_account_arg == "true" else False

        add_journal_info = None
        if add_journal_info_arg != "none":
            add_journal_info = True if add_journal_info_arg == "true" else False

        dry_run = None
        if dry_run_arg != "none":
            dry_run = True if dry_run_arg == "true" else False

        raises = EXCEPTIONS.get(raises_arg)

        eissn = "1234-5678"
        pissn = "9876-5432"

        if add_journal_info:
            jsource = JournalFixtureFactory.make_journal_source(in_doaj=True)
            j = Journal(**jsource)
            bj = j.bibjson()
            bj.title = "Add Journal Info Title"
            bj.remove_identifiers()
            bj.add_identifier(bj.P_ISSN, pissn)
            bj.add_identifier(bj.E_ISSN, eissn)
            j.save(blocking=True)

        article = None
        original_id = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source(
                eissn=eissn,
                pissn=pissn,
                doi="10.123/abc/1",
                fulltext="http://example.com/1")
            del source["bibjson"]["journal"]
            article = Article(**source)
            article.set_id()
            original_id = article.id

        account = None
        if account_arg != "none":
            source = AccountFixtureFactory.make_publisher_source()
            account = Account(**source)

        legit = True if account_arg == "owner" else False
        ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit)
        self.svc.is_legitimate_owner = ilo_mock

        owned = [eissn, pissn] if account_arg == "owner" else []
        shared = []
        unowned = [eissn] if account_arg == "not_owner" else []
        unmatched = [pissn] if account_arg == "not_owner" else []
        ios_mock = BLLArticleMockFactory.issn_ownership_status(
            owned, shared, unowned, unmatched)
        self.svc.issn_ownership_status = ios_mock

        gd_mock = None
        if article_duplicate_arg == "yes":
            gd_mock = BLLArticleMockFactory.get_duplicate(
                eissn=eissn,
                pissn=pissn,
                doi="10.123/abc/1",
                fulltext="http://example.com/1")
        else:
            gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True)
        self.svc.get_duplicate = gd_mock

        mock_article = self.svc.get_duplicate(article)

        ###########################################################
        # Execution

        if raises is not None:
            with self.assertRaises(raises):
                self.svc.create_article(article, account, duplicate_check,
                                        merge_duplicate, limit_to_account,
                                        add_journal_info, dry_run)
        else:
            report = self.svc.create_article(article, account, duplicate_check,
                                             merge_duplicate, limit_to_account,
                                             add_journal_info, dry_run)

            assert report["success"] == success

            # check that the article was saved and if it was saved that it was suitably merged
            if original_saved_arg == "yes":
                original = Article.pull(original_id)
                assert original is not None
                assert report["update"] == 0
            elif article is not None:
                original = Article.pull(original_id)
                assert original is None

            if merge_saved_arg == "yes":
                merged = Article.pull(mock_article.id)
                assert merged is not None
                assert report["update"] == 1
            elif mock_article is not None:
                merged = Article.pull(mock_article.id)
                assert merged is None

            if add_journal_info:
                assert article.bibjson(
                ).journal_title == "Add Journal Info Title"
Esempio n. 29
0
 def all_articles(self):
     from portality.models import Article
     return Article.find_by_issns(self.known_issns())
    def test_01_issn_ownership_status(self, name, kwargs):

        article_arg = kwargs.get("article")
        owner_arg = kwargs.get("owner")
        article_eissn_arg = kwargs.get("article_eissn")
        article_pissn_arg = kwargs.get("article_pissn")
        seen_eissn_arg = kwargs.get("seen_eissn")
        seen_pissn_arg = kwargs.get("seen_pissn")
        journal_owner_arg = kwargs.get("journal_owner")

        raises_arg = kwargs.get("raises")

        raises = EXCEPTIONS.get(raises_arg)

        ###############################################
        ## set up

        owner = None
        if owner_arg != "none":
            owner = Account(**AccountFixtureFactory.make_publisher_source())

        owner_id = None
        if owner is not None:
            owner_id = owner.id

        # generate our incoming article
        article = None
        eissn = None
        pissn = None
        if article_arg == "exists":
            source = ArticleFixtureFactory.make_article_source()
            article = Article(**source)
            article.set_id()

            article.bibjson().remove_identifiers("pissn")
            if article_pissn_arg == "yes":
                pissn = "1234-5678"
                article.bibjson().add_identifier("pissn", pissn)

            article.bibjson().remove_identifiers("eissn")
            if article_eissn_arg == "yes":
                eissn = "9876-5432"
                article.bibjson().add_identifier("eissn", eissn)

        issns = []
        if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes":
            issns.append((eissn, pissn))
        if eissn is not None and seen_eissn_arg == "yes":
            issns.append((eissn, "4321-9876"))
            issns.append((eissn, None))
        if pissn is not None and seen_pissn_arg == "yes":
            issns.append(("6789-4321", pissn))
            issns.append((None, pissn))

        owners = []
        if journal_owner_arg == "none":
            owners = [None]
        elif journal_owner_arg == "correct" and owner_id is not None:
            owners = [owner_id]
        elif journal_owner_arg == "incorrect":
            owners = ["randomowner"]
        elif journal_owner_arg == "mix" and owner_id is not None:
            owners.append(owner_id)
            owners.append("randomowner")
            owners.append(None)

        mock = ModelJournalMockFactory.find_by_issn(issns, owners)
        Journal.find_by_issn = mock

        ###########################################################
        # Execution

        svc = DOAJ.articleService()

        if raises is not None:
            with self.assertRaises(raises):
                svc.issn_ownership_status(article, owner_id)
        else:
            owned, shared, unowned, unmatched = svc.issn_ownership_status(
                article, owner_id)

            owned_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "correct"
            ]:
                assert eissn in owned
                owned_count += 1
            elif eissn is not None:
                assert eissn not in owned

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "correct"
            ]:
                assert pissn in owned
                owned_count += 1
            elif pissn is not None:
                assert pissn not in owned

            assert len(owned) == owned_count

            shared_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "mix"
            ]:
                assert eissn in shared
                shared_count += 1
            elif eissn is not None:
                assert eissn not in shared

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "mix"
            ]:
                assert pissn in shared
                shared_count += 1
            elif pissn is not None:
                assert pissn not in shared

            assert len(shared) == shared_count

            unowned_count = 0
            if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [
                    "incorrect", "none"
            ]:
                assert eissn in unowned
                unowned_count += 1
            elif eissn is not None:
                assert eissn not in unowned

            if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [
                    "incorrect", "none"
            ]:
                assert pissn in unowned
                unowned_count += 1
            elif pissn is not None:
                assert pissn not in unowned

            assert len(unowned) == unowned_count

            unmatched_count = 0
            if seen_eissn_arg == "no" and eissn is not None:
                assert eissn in unmatched
                unmatched_count += 1
            elif eissn is not None:
                assert eissn not in unmatched

            if seen_pissn_arg == "no" and pissn is not None:
                assert pissn in unmatched
                unmatched_count += 1
            elif pissn is not None:
                assert pissn not in unmatched

            assert len(unmatched) == unmatched_count