def query_for_vol(self, journalobj): # The journal object will already be the correct continuation, if the user provided sufficient detail. issns = journalobj.bibjson().issns() # If there's no way to get the wanted issns, give up, else run the query if issns is None: return None else: volume_query = deepcopy(TERMS_SEARCH) volume_query["size"] = 0 issn_term = {"terms": {"index.issn.exact": issns}} volume_query["query"]["bool"]["must"].append(issn_term) vol_term = {"term": {"bibjson.journal.volume.exact": self.volume}} volume_query["query"]["bool"]["must"].append(vol_term) # And if there's an issue, query that too. Note, issue does not make sense on its own. if self.issue: iss_term = {"term": {"bibjson.journal.number.exact": self.issue}} volume_query["query"]["bool"]["must"].append(iss_term) app.logger.debug("OpenURL subsequent volume query to article: " + json.dumps(volume_query)) return Article.query(q=volume_query)
def wipe_emails(connection, batch_size=500): batch = [] for a in esprit.tasks.scroll(connection, 'article', q=HAS_EMAIL_QUERY): # Create the article model article = Article(**a) # Use the DataObj prune to remove emails _ = article.bibjson(construct_silent_prune=True) batch.append(article.data) if len(batch) >= batch_size: esprit.raw.bulk(connection, 'article', batch, idkey='id') batch = [] # Finish saving the final batch esprit.raw.bulk(connection, 'article', batch, idkey='id')
def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True): if articles: # list the issns of all the journals issns = cls.issns_by_query(query) # issue a delete request over all the articles by those issns from portality.models import Article Article.delete_by_issns(issns, snapshot=snapshot_articles) # snapshot the journal record if snapshot_journals: js = cls.iterate(query, page_size=1000) for j in js: j.snapshot() # finally issue a delete request against the journals cls.delete_by_query(query)
def article_stats(self): from portality.models import Article q = ArticleStatsQuery(self.known_issns()) data = Article.query(q=q.query()) hits = data.get("hits", {}) total = hits.get("total", 0) latest = None if total > 0: latest = hits.get("hits", [])[0].get("_source").get("created_date") return { "total" : total, "latest" : latest }
def query_es(self): """ Query Elasticsearch for a set of matches for this request. :return: The results of a query through the dao, a JSON object. """ # Copy to the template, which will be populated with terms populated_query = deepcopy(TERMS_SEARCH) # Get all of the attributes with values set. set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] # If we don't have a genre, guess journal FIXME: is it correct to assume journal? if not self.genre: self.genre = SUPPORTED_GENRES[ 0] # TODO: we may want to handle 404 instead # Set i to use either our mapping for journals or articles i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) # Add the attributes to the query for (k, v) in set_attributes: es_term = OPENURL_TO_ES[k][i] if es_term is None: continue else: term = {"term": {es_term: v}} populated_query["query"]["bool"]["must"].append(term) # avoid doing an empty query if len(populated_query["query"]["bool"]["must"]) == 0: app.logger.debug("No valid search terms in OpenURL object") return None # Return the results of the query if i == 0: app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) return Journal.query(q=populated_query) elif i == 1: app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) return Article.query(q=populated_query)
def setUp(self): super(TestCreateOrUpdateArticle, self).setUp() self.publisher = Account() self.publisher.add_role("publisher") self.publisher.save(blocking=True) self.admin = Account() self.admin.add_role("admin") self.admin.save(blocking=True) sources = JournalFixtureFactory.make_many_journal_sources(2, True) self.journal1 = Journal(**sources[0]) self.journal1.set_owner(self.publisher.id) jbib1 = self.journal1.bibjson() jbib1.add_identifier(jbib1.P_ISSN, "1111-1111") jbib1.add_identifier(jbib1.E_ISSN, "2222-2222") self.journal1.save(blocking=True) self.publisher.add_journal(self.journal1) self.journal2 = Journal(**sources[1]) jbib2 = self.journal2.bibjson() jbib2.add_identifier(jbib2.P_ISSN, "1234-5678") jbib2.add_identifier(jbib2.E_ISSN, "9876-5432") self.journal2.save(blocking=True) self.article10 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-10", fulltext="https://www.article10.com")) self.article10.set_id("articleid10") self.article10.save(blocking=True) self.article11 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-11", fulltext="https://www.article11.com")) self.article11.set_id("articleid11") self.article11.save(blocking=True) self.article2 = Article(**ArticleFixtureFactory.make_article_source( pissn="1234-5678", eissn="9876-5432", doi="10.0000/article-2", fulltext="https://www.article2.com")) self.article2.set_id("articleid2") self.article2.save(blocking=True)
def test_04_old_doi_new_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url("https://updated.com", ba.FULLTEXT) # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 new, received: {}".format(resp) art = Article.pull(self.article10.id) assert art.get_normalised_fulltext( ) == "//updated.com", "expected //updated.com, received: {}".format( self.article10.get_normalised_fulltext())
def query_es(self): """ Query Elasticsearch for a set of matches for this request. :return: The results of a query through the dao, a JSON object. """ # Copy to the template, which will be populated with terms populated_query = deepcopy(TERMS_SEARCH) # Get all of the attributes with values set. set_attributes = [(x, getattr(self, x)) for x in JOURNAL_SCHEMA_KEYS[:-1] if getattr(self, x)] # If we don't have a genre, guess journal FIXME: is it correct to assume journal? if not self.genre: self.genre = SUPPORTED_GENRES[0] # TODO: we may want to handle 404 instead # Set i to use either our mapping for journals or articles i = SUPPORTED_GENRES.index(getattr(self, 'genre').lower()) # Add the attributes to the query for (k, v) in set_attributes: es_term = OPENURL_TO_ES[k][i] if es_term is None: continue else: term = {"term": {es_term: v}} populated_query["query"]["bool"]["must"].append(term) # avoid doing an empty query if len(populated_query["query"]["bool"]["must"]) == 0: app.logger.debug("No valid search terms in OpenURL object") return None # Return the results of the query if i == 0: app.logger.debug("OpenURL query to journal: " + json.dumps(populated_query)) return Journal.query(q=populated_query) elif i == 1: app.logger.debug("OpenURL query to article: " + json.dumps(populated_query)) return Article.query(q=populated_query)
def test_00_no_doi_and_url_changed(self): ba = self.article10.bibjson() ba.title = "Updated Article" # try for admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) a = Article.pull(self.article10.id) assert a.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \ .format(a.bibjson().title)
def test_05_new_doi_old_url(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, "10.0000/article-UPDATED") # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) art = Article.pull(self.article10.id) assert art.get_normalised_doi() == "10.0000/article-UPDATED", \ "expected 10.0000/article-UPDATED, received: {}".format( self.article10.get_normalised_doi())
def test_is_acceptable(self, value, kwargs): doi_arg = kwargs.get("doi") ft_arg = kwargs.get("fulltext_url") is_acceptable_arg = kwargs.get("is_acceptable") is_acceptable = True if is_acceptable_arg == "yes" else False doi = "10.1234/article-10" if doi_arg == "exists" else None ft = "https://example.com" if ft_arg == "exists" else None article_source = ArticleFixtureFactory.make_article_source() article = Article(**article_source) if doi is None: article.bibjson().remove_identifiers("doi") if ft is None: article.bibjson().remove_urls("fulltext") if is_acceptable: self.assertIsNone(self.svc.is_acceptable(article)) else: with self.assertRaises(exceptions.ArticleNotAcceptable): self.svc.is_acceptable(article)
def test_01_is_legitimate_owner(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_eissn_arg = kwargs.get("article_eissn") article_pissn_arg = kwargs.get("article_pissn") seen_eissn_arg = kwargs.get("seen_eissn") seen_pissn_arg = kwargs.get("seen_pissn") journal_owner_arg = kwargs.get("journal_owner") raises_arg = kwargs.get("raises") legit_arg = kwargs.get("legit") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # generate our incoming article article = None eissn = None pissn = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source() article = Article(**source) article.set_id() article.bibjson().remove_identifiers("pissn") if article_pissn_arg == "yes": pissn = "1234-5678" article.bibjson().add_identifier("pissn", pissn) article.bibjson().remove_identifiers("eissn") if article_eissn_arg == "yes": eissn = "9876-5432" article.bibjson().add_identifier("eissn", eissn) # assemble the issns that will appear to be in the index. One that is irrelevant, and just # serves to be "noise" in the database, and the other that matches the spec required by # the test issns = [("1111-1111", "2222-2222")] if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes": issns.append((eissn, pissn)) if eissn is not None and seen_eissn_arg == "yes": issns.append((eissn, None)) if pissn is not None and seen_pissn_arg == "yes": issns.append((None, pissn)) owners = [] if journal_owner_arg == "none": owners = [None] elif journal_owner_arg == "correct" and owner_id is not None: owners = [owner_id] elif journal_owner_arg == "incorrect": owners = ["randomowner"] elif journal_owner_arg == "mix" and owner_id is not None: owners.append(owner_id) owners.append("randomowner") owners.append(None) mock = ModelJournalMockFactory.find_by_issn(issns, owners) Journal.find_by_issn = mock ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.is_legitimate_owner(article, owner_id) else: legit = svc.is_legitimate_owner(article, owner_id) if legit_arg == "no": assert legit is False elif legit_arg == "yes": assert legit is True
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save() # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source( eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save(blocking=True) article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article) else: possible_articles = svc.discover_duplicates(article) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[ -1] else: assert possible_articles["fulltext"][0].id == article_ids[ 0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def articles(self): return [Article(**a) for a in self.data.get("articles", [])]
def migrate_articles(source, batch_size=5000): # read in the content f = open(source) xml = etree.parse(f) f.close() articles = xml.getroot() print "migrating", str(len(articles)), "article records from", source batch = [] for element in articles: a = Article() b = _to_article_bibjson(element) a.set_bibjson(b) a.set_created(_created_date(element)) a.set_id() batch.append(a.data) if len(batch) >= batch_size: Article.bulk(batch, refresh=True) del batch[:] if len(batch) > 0: Article.bulk(batch)
def test_01_get_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") doi_duplicates_arg = kwargs.get("doi_duplicates") fulltext_duplicates_arg = kwargs.get("fulltext_duplicates") overlap_arg = kwargs.get("overlap") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) doi_duplicates = -1 if doi_duplicates_arg not in ["-"]: doi_duplicates = int(doi_duplicates_arg) fulltext_duplicates = -1 if fulltext_duplicates_arg not in ["-"]: fulltext_duplicates = int(fulltext_duplicates_arg) overlap = -1 if overlap_arg not in ["-"]: overlap = int(overlap_arg) expected_count = doi_duplicates + fulltext_duplicates - overlap ############################################### ## set up owner = None if owner_arg != "no": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # generate our incoming article article = None if article_arg == "yes": source = ArticleFixtureFactory.make_article_source() article = Article(**source) article.set_id() mock = BLLArticleMockFactory.discover_duplicates( doi_duplicates, fulltext_duplicates, overlap) self.svc.discover_duplicates = mock # determine if we expect a merge conflict dds = 0 if doi_duplicates < 0 else doi_duplicates fds = 0 if fulltext_duplicates < 0 else fulltext_duplicates ol = 0 if overlap < 0 else overlap expect_merge_conflict = dds + fds - ol > 1 ########################################################### # Execution first_article = None # first do get_duplicates if raises is not None: with self.assertRaises(raises): self.svc.get_duplicates(article) else: duplicates = self.svc.get_duplicates(article) if len(duplicates) > 0: first_article = duplicates[0] # check that we have the number of results we expected assert len(duplicates) == expected_count # check that the articles are unique in the list article_ids = [a.id for a in duplicates] article_ids.sort() deduped = list(set(article_ids)) deduped.sort( ) # so it's comparable to the article_ids list, as the set() call destroys ordering assert article_ids == deduped # i.e. that there were no duplicates # check that the articles are ordered by last_updated last_updateds = [ datetime.strptime(a.last_updated, "%Y-%m-%dT%H:%M:%SZ") for a in duplicates ] sorted_lu = sorted(last_updateds, reverse=True) assert sorted_lu == last_updateds # i.e. they were already sorted # then the same again on the singular get_duplicate if raises is not None: with self.assertRaises(raises): self.svc.get_duplicate(article) elif expect_merge_conflict: with self.assertRaises(exceptions.ArticleMergeConflict): self.svc.get_duplicate(article) else: duplicate = self.svc.get_duplicate(article) if expected_count > 0: assert isinstance(duplicate, Article) assert duplicate.id == first_article.id else: assert duplicate is None
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate(eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None if add_journal_info: assert article.bibjson().journal_title == "Add Journal Info Title"
class TestCreateOrUpdateArticle(DoajTestCase): def setUp(self): super(TestCreateOrUpdateArticle, self).setUp() self.publisher = Account() self.publisher.add_role("publisher") self.publisher.save(blocking=True) self.admin = Account() self.admin.add_role("admin") self.admin.save(blocking=True) sources = JournalFixtureFactory.make_many_journal_sources(2, True) self.journal1 = Journal(**sources[0]) self.journal1.set_owner(self.publisher.id) jbib1 = self.journal1.bibjson() jbib1.add_identifier(jbib1.P_ISSN, "1111-1111") jbib1.add_identifier(jbib1.E_ISSN, "2222-2222") self.journal1.save(blocking=True) self.publisher.add_journal(self.journal1) self.journal2 = Journal(**sources[1]) jbib2 = self.journal2.bibjson() jbib2.add_identifier(jbib2.P_ISSN, "1234-5678") jbib2.add_identifier(jbib2.E_ISSN, "9876-5432") self.journal2.save(blocking=True) self.article10 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-10", fulltext="https://www.article10.com")) self.article10.set_id("articleid10") self.article10.save(blocking=True) self.article11 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-11", fulltext="https://www.article11.com")) self.article11.set_id("articleid11") self.article11.save(blocking=True) self.article2 = Article(**ArticleFixtureFactory.make_article_source( pissn="1234-5678", eissn="9876-5432", doi="10.0000/article-2", fulltext="https://www.article2.com")) self.article2.set_id("articleid2") self.article2.save(blocking=True) def tearDown(self): super(TestCreateOrUpdateArticle, self).tearDown() def test_00_no_doi_and_url_changed(self): ba = self.article10.bibjson() ba.title = "Updated Article" # try for admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.bibjson().title == "Updated Article", "Expected `Updated Article`, received: {}" \ .format(self.article10.bibjson().title) ba.title = "Updated 2nd time" # try for publisher resp = ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.bibjson().title == "Updated 2nd time", "Expected `Updated 2nd time`, received: {}" \ .format(self.article10.bibjson().title) def test_01_new_doi_new_url(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) ba.remove_urls(ba.FULLTEXT) ba.add_identifier(ba.DOI, "10.0000/NEW") ba.add_url(ba.FULLTEXT, "https://www.UPDATED.com") #for publisher resp = ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) assert resp["success"] == 1, "expected 1 new, received: {}".format( resp) assert resp["update"] == 0, "expected 1 new, received: {}".format(resp) assert resp["new"] == 1, "expected 1 new, received: {}".format(resp) #for admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 new, received: {}".format( resp) assert resp["update"] == 1, "expected 1 new, received: {}".format(resp) assert resp["new"] == 0, "expected 1 new, received: {}".format(resp) def test_02_old_doi_existing_url_admin(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) # check for url from other article owned by the same publisher ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) # check for url from other article owned by someone else ba.remove_urls(ba.FULLTEXT) ba.add_url(self.article2.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_03_existing_doi_old_url_admin(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) # check for DOI from other article owned by the same publisher ba.add_identifier(ba.DOI, "10.0000/article-11") # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) ba.remove_identifiers(ba.DOI) # check for DOI from other article owned by someone else ba.add_identifier(ba.DOI, "10.0000/article-2") # try as a publisher with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(ArticleMergeConflict): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_04_old_doi_new_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url("https://updated.com", ba.FULLTEXT) # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 new, received: {}".format(resp) assert self.article10.get_normalised_fulltext( ) == "//updated.com", "expected //updated.com, received: {}".format( self.article10.get_normalised_fulltext()) def test_05_new_doi_old_url(self): ba = self.article10.bibjson() ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, "10.0000/article-UPDATED") # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin resp = ArticleService.create_article( self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) assert resp["success"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["update"] == 1, "expected 1 updated, received: {}".format( resp) assert resp["new"] == 0, "expected 1 updated, received: {}".format( resp) assert self.article10.get_normalised_doi() == "10.0000/article-UPDATED", \ "expected 10.0000/article-UPDATED, received: {}".format( self.article10.get_normalised_fulltext()) def test_06_existing_doi_new_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url("https://updated.com", ba.FULLTEXT) # check for doi from other article of the same publisher ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, self.article11.bibjson().get_one_identifier(ba.DOI)) # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id) def test_07_new_doi_existing_url(self): ba = self.article10.bibjson() ba.remove_urls(ba.FULLTEXT) ba.add_url(self.article11.bibjson().get_single_url(ba.FULLTEXT), ba.FULLTEXT) # check for doi from other article of the same publisher ba.remove_identifiers(ba.DOI) ba.add_identifier(ba.DOI, "10.0000/article-UPDATED") # try as publisher with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.publisher, article=self.article10) # try as an admin with self.assertRaises(DuplicateArticleException): ArticleService.create_article(self=ArticleService(), account=self.admin, article=self.article10, update_article_id=self.article10.id)
def all_articles(self): from portality.models import Article return Article.find_by_issns(self.known_issns())
def migrate_articles(source, batch_size=5000): # read in the content f = open(source) xml = etree.parse(f) f.close() articles = xml.getroot() print "migrating", str(len(articles)), "article records from", source counter = 0 omissions = 0 batch = [] for element in articles: a = Article() b = _to_article_bibjson(element) a.set_bibjson(b) hasjournal = _add_journal_info(a) if not hasjournal: print "INFO: omitting article" omissions += 1 continue a.set_created(_created_date(element)) a.set_id() a.prep() # prepare the thing to be saved, which is necessary since we're not actually going to save() batch.append(a.data) if len(batch) >= batch_size: counter += len(batch) print "Writing batch, size", len(batch) Article.bulk(batch, refresh=True) print "batch written, total so far", counter del batch[:] if len(batch) > 0: counter += len(batch) print "Writing final batch, size", len(batch) Article.bulk(batch, refresh=True) print "batch written, total written", counter print "wrote", counter, "articles, omitted", omissions
def test_01_batch_create_article(self, name, kwargs): articles_arg = kwargs.get("articles") duplicate_in_batch_arg = kwargs.get("duplicate_in_batch") duplicate_in_index_arg = kwargs.get("duplicate_in_index") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") fail_arg = kwargs.get("fail") update_arg = kwargs.get("update") ############################################### ## set up success = int(success_arg) fail = int(fail_arg) update = int(update_arg) duplicate_in_batch = duplicate_in_batch_arg == "yes" duplicate_in_index = int(duplicate_in_index_arg) raises = EXCEPTIONS.get(raises_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) journal_specs = [] last_doi = None last_ft = None last_issn = None last_id = None articles = None if articles_arg != "none": articles = [] if articles_arg == "yes": # one with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="0000-0000", pissn="0000-0000", doi="10.123/abc/0", fulltext=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0000"}) # another with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="1111-1111", pissn="1111-1111", doi="10.123/abc/1", fulltext=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "1", "pissn" : "1111-1111", "eissn" : "1111-1111"}) # one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="2222-2222", pissn="2222-2222", fulltext="http://example.com/2", doi=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2222"}) # another one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="3333-3333", pissn="3333-3333", fulltext="http://example.com/3", doi=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3333"}) last_issn = "3333-3333" last_doi = "10.123/abc/1" last_ft = "http://example.com/3" last_id = articles[-1].id if duplicate_in_batch: # one with a duplicated DOI source = ArticleFixtureFactory.make_article_source( eissn="4444-4444", pissn="4444-4444", doi="10.123/abc/0", fulltext="http://example.com/4" ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4444"}) # one with a duplicated Fulltext source = ArticleFixtureFactory.make_article_source( eissn="5555-5555", pissn="5555-5555", doi="10.123/abc/5", fulltext="http://example.com/1" ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5555"}) ilo_mock = None if account_arg == "owner": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True) elif account_arg == "own_1": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit_on_issn=[last_issn]) else: ilo_mock = BLLArticleMockFactory.is_legitimate_owner() self.svc.is_legitimate_owner = ilo_mock gd_mock = None if duplicate_in_index == 1: gd_mock = BLLArticleMockFactory.get_duplicate(given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft) elif duplicate_in_index == 2: gd_mock = BLLArticleMockFactory.get_duplicate(merge_conflict=True) else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], []) self.svc.issn_ownership_status = ios_mock if add_journal_info: gj_mock = ModelArticleMockFactory.get_journal(journal_specs) Article.get_journal = gj_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): try: self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) except exceptions.IngestException as e: if duplicate_in_index != 2: report = e.result assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update raise else: report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) # make sure all the articles are saved before running the asserts aids = [(a.id, a.last_updated) for a in articles] for aid, lu in aids: Article.block(aid, lu, sleep=0.05) assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update if success > 0: all_articles = Article.all() if len(all_articles) != success: time.sleep(0.5) all_articles = Article.all() assert len(all_articles) == success for article in all_articles: if add_journal_info: assert article.bibjson().journal_title is not None else: assert article.bibjson().journal_title is None else: # there's nothing in the article index with self.assertRaises(ESMappingMissingError): Article.all()
class TestAdminEditMetadata(DoajTestCase): def setUp(self): super(TestAdminEditMetadata, self).setUp() admin_account = Account.make_account(username="******", name="Admin", email="*****@*****.**", roles=["admin"]) admin_account.set_password('password123') admin_account.save() publisher_account = Account.make_account(username="******", name="Publisher", email="*****@*****.**", roles=["publisher"]) publisher_account.set_password('password456') publisher_account.save(blocking=True) self.j = Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) self.j.save(blocking=True) self.a = Article(**ArticleFixtureFactory.make_article_source( in_doaj=True)) self.a.save(blocking=True) def tearDown(self): super(TestAdminEditMetadata, self).tearDown() del self.a del self.j def admin_post_article_metadata_form(self, formdata): """ Post a form tto the article metadata endpoint """ with self.app_test.test_client() as t_client: self.login(t_client, "admin", "password123") resp = t_client.post(url_for('admin.article_page', article_id=self.a.id), data=dict(formdata)) assert resp.status_code == 200, "expected: 200, received: {}".format( resp.status) @staticmethod def login(app, username, password): return app.post('/account/login', data=dict(username=username, password=password), follow_redirects=True) @staticmethod def logout(app): return app.get('/account/logout', follow_redirects=True) def test_01_open_article_page(self): """ Ensure only Admin can open the article metadata form """ with self.app_test.test_client() as t_client: self.login(t_client, "admin", "password123") resp = t_client.get(url_for('admin.article_page', article_id=self.a.id), follow_redirects=False) assert resp.status_code == 200, "expected: 200, received: {}".format( resp.status) # user not logged in with self._make_and_push_test_context(): with self.app_test.test_client() as t_client: resp = t_client.get(url_for('admin.article_page', article_id=self.a.id), follow_redirects=False) assert resp.status_code == 302, "expected: 302, received: {}".format( resp.status) #expect redirection to login page # login as publisher with self.app_test.test_client() as t_client: self.login(t_client, "publisher", "password456") resp = t_client.get(url_for('admin.article_page', article_id=self.a.id), follow_redirects=False) assert resp.status_code == 302, "expected: 302, received: {}".format( resp.status) # expect redirection to login page def test_02_update_article_metadata_no_url_fulltext(self): """ Update an article with no change to identifying fields: URL and DOI """ source = ArticleMetadataFactory( article_source=self.a).update_article_no_change_to_url_and_doi() # Submit the form self.admin_post_article_metadata_form(source) # Retrieve the result a = Article.pull(self.a.id) b = a.bibjson() assert b.title == source[ 'title'], 'expected updated title, received: {}'.format(b.title) def test_03_update_fulltext_valid(self): """ Update an article's fulltext URL """ source = ArticleMetadataFactory( article_source=self.a).update_article_fulltext(valid=True) # Submit the form self.admin_post_article_metadata_form(source) a = Article.pull(self.a.id) bj = a.bibjson() # expect updated fulltext url assert bj.get_single_url( "fulltext" ) == 'https://www.newarticleurl.co.uk/fulltext', 'expected updated url, received: {}'.format( bj.get_single_url("fulltext")) def test_04_update_fulltext_invalid(self): """ The form should ignore an update that has the same fulltext URL as an existing article """ source = ArticleMetadataFactory( article_source=self.a).update_article_fulltext(valid=False) a1source = ArticleFixtureFactory.make_article_source(in_doaj=True) a1source["id"] = 'aaaaaaaaa_article' a1source["fulltext"] = "https://www.urltorepeat.com" a1 = Article(**a1source) a1.save(blocking=True) # Submit the form self.admin_post_article_metadata_form(source) # Retrieve the result - it should be unchanged a = Article.pull(self.a.id) bj = a.bibjson() assert bj.title == "Article Title", 'expect old title, received: {}'.format( bj.title) assert bj.get_single_url( "fulltext" ) == 'http://www.example.com/article', 'expected old url, received: {}'.format( bj.get_single_url("fulltext")) def test_05_update_doi_valid(self): """ The form should allow an update with a new valid DOI """ source = ArticleMetadataFactory( article_source=self.a).update_article_doi(valid=True) # Submit the form self.admin_post_article_metadata_form(source) # Retrieve the result a = Article.pull(self.a.id) bj = a.bibjson() # expect new data assert bj.title == "New title", 'expect updated title, received: {}'.format( bj.title) assert bj.get_one_identifier( "doi" ) == '10.1111/article-0', 'expected new doi, received: {}'.format( bj.get_single_identifier("doi")) def test_06_update_doi_invalid(self): source = ArticleMetadataFactory( article_source=self.a).update_article_doi(valid=False) a1source = ArticleFixtureFactory.make_article_source(in_doaj=True) a1source['id'] = 'aaaaaaaaa_article' a1source["fulltext"] = "https://www.someurl.com" a1source["doi"] = '10.1234/article' a1 = Article(**a1source) a1.save(blocking=True) # Submit the form self.admin_post_article_metadata_form(source) a = Article.pull(self.a.id) bj = a.bibjson() # expect old data assert bj.title == "Article Title", 'expect old title, received: {}'.format( bj.title) assert bj.get_one_identifier( "doi" ) == '10.0000/SOME.IDENTIFIER', 'expected old doi, received: {}'.format( bj.get_one_identifier("doi"))
def test_01_create_article(self, value, kwargs): article_arg = kwargs.get("article") account_arg = kwargs.get("account") get_duplicate_result_arg = kwargs.get("get_duplicate_result") role_arg = kwargs.get("role") merge_duplicate_arg = kwargs.get("merge_duplicate") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") update_article_id_arg = kwargs.get("update_article_id") has_ft_doi_changed_arg = kwargs.get("has_ft_doi_changed_arg") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) has_ft_doi_changed = True if has_ft_doi_changed_arg == "yes" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" doi = "10.123/abc/1" fulltext = "http://example.com/1" another_doi = "10.123/duplicate-1" another_eissn = "1111-1111" another_pissn = "2222-2222" duplicate_id = None original_id = None update_article_id = None if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) if get_duplicate_result_arg == 'different': source = ArticleFixtureFactory.make_article_source( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] duplicate = Article(**source) duplicate.save() duplicate_id = duplicate.id article_id_to_upload = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] article = Article(**source) article.set_id() article_id_to_upload = article.id if get_duplicate_result_arg == "itself": source = ArticleFixtureFactory.make_article_source( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext) del source["bibjson"]["journal"] duplicate = Article(**source) duplicate.set_id(article_id_to_upload) duplicate.save() duplicate_id = duplicate.id if update_article_id_arg != "none": another_source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) original = Article(**another_source) original.save(blocking=True) original_id = original.id if update_article_id_arg == "doi_ft_not_changed": article.bibjson().title = "This needs to be updated" elif update_article_id_arg == "doi_ft_changed_duplicate": article.bibjson().remove_identifiers("doi") article.bibjson().add_identifier("doi", another_doi) elif update_article_id_arg == "doi_ft_changed_ok": article.bibjson().remove_identifiers("doi") article.bibjson().add_identifier("doi", "10.1234/updated") else: update_article_id = None account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status( owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock if role_arg == "admin": account.set_role("admin") account.save() if get_duplicate_result_arg == "none": gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) elif get_duplicate_result_arg == "itself": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext, given_article_id=original_id) elif get_duplicate_result_arg == "different": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=another_eissn, pissn=another_pissn, doi=doi, fulltext=fulltext, given_article_id=duplicate_id) else: gd_mock = BLLArticleMockFactory.get_duplicate( given_article_id="exception") self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) if role_arg == "admin" or (role_arg == "publisher" and account_arg == "owner"): has_permissions_mock = BLLArticleMockFactory.has_permissions(True) else: has_permissions_mock = BLLArticleMockFactory.has_permissions(False) self.svc.has_permissions = has_permissions_mock prepare_update_admin_mock = BLLArticleMockFactory._prepare_update_admin( get_duplicate_result_arg, update_article_id_arg) self.svc._prepare_update_admin = prepare_update_admin_mock prepare_update_publisher_mock = BLLArticleMockFactory._prepare_update_publisher( get_duplicate_result_arg, has_ft_doi_changed) self.svc._prepare_update_publisher = prepare_update_publisher_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, merge_duplicate=merge_duplicate, add_journal_info=add_journal_info, dry_run=dry_run, update_article_id=original_id) else: report = self.svc.create_article(article, account, merge_duplicate=merge_duplicate, add_journal_info=add_journal_info, dry_run=dry_run, update_article_id=original_id) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes" and update_article_id is not None: if get_duplicate_result_arg == "itself": original = Article.pull(update_article_id) assert original is not None assert report["update"] == 1, "update: {}".format( report["update"]) assert report["new"] == 0, "update: {}".format( report["new"]) elif original_saved_arg == "yes": if get_duplicate_result_arg == "itself": new = Article.pull(article_id_to_upload) assert new is not None assert report["update"] == 1, "update: {}".format( report["update"]) assert report["new"] == 0, "update: {}".format( report["new"]) elif get_duplicate_result_arg == "none": new = Article.pull(article_id_to_upload) assert new is not None assert report["update"] == 0, "update: {}".format( report["update"]) assert report["new"] == 1, "update: {}".format( report["new"]) if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None and mock_article.id != original_id: merged = Article.pull(mock_article.id) assert merged is None, "merged: {}".format(merged) if add_journal_info: assert article.bibjson( ).journal_title == "Add Journal Info Title" if update_article_id_arg == "doi_ft_changed_ok": original = Article.pull(original_id) assert original is not None elif update_article_id_arg == "doi_ft_not_changed": original = Article.pull(original_id) assert original is not None
def test_01_discover_duplicates(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_doi_arg = kwargs.get("article_doi") doi_duplicate_arg = kwargs.get("doi_duplicate") article_fulltext_arg = kwargs.get("article_fulltext") fulltext_duplicate_arg = kwargs.get("fulltext_duplicate") articles_by_doi_arg = kwargs.get("articles_by_doi") articles_by_fulltext_arg = kwargs.get("articles_by_fulltext") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # create a journal for the owner if owner_arg not in ["none"]: source = JournalFixtureFactory.make_journal_source(in_doaj=True) journal = Journal(**source) journal.set_owner(owner.id) journal.bibjson().remove_identifiers() journal.bibjson().add_identifier("eissn", "1234-5678") journal.bibjson().add_identifier("pissn", "9876-5432") journal.save(blocking=True) # determine what we need to load into the index article_ids = [] aids_block = [] if owner_arg not in ["none", "no_articles"]: for i, ident in enumerate(IDENTS): the_doi = ident["doi"] if doi_duplicate_arg == "padded": the_doi = " " + the_doi + " " elif doi_duplicate_arg == "prefixed": the_doi = "https://dx.doi.org/" + the_doi the_fulltext = ident["fulltext"] if article_fulltext_arg != "invalid": if fulltext_duplicate_arg == "padded": the_fulltext = " http:" + the_fulltext elif fulltext_duplicate_arg == "http": the_fulltext = "http:" + the_fulltext elif fulltext_duplicate_arg == "https": the_fulltext = "https:" + the_fulltext else: the_fulltext = "http:" + the_fulltext source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi=the_doi, fulltext=the_fulltext) article = Article(**source) article.set_id() article.save() article_ids.append(article.id) aids_block.append((article.id, article.last_updated)) # generate our incoming article article = None doi = None fulltext = None if article_arg == "yes": eissn = "1234=5678" # one matching pissn = "6789-1234" # the other not - issn matches are not relevant to this test if article_doi_arg in ["yes", "padded"]: doi = "10.1234/abc/11" if doi_duplicate_arg in ["yes", "padded"]: doi = IDENTS[0]["doi"] if article_doi_arg == "padded": doi = " doi:" + doi + " " elif article_doi_arg in ["invalid"]: doi = IDENTS[-1]["doi"] if article_fulltext_arg in ["yes", "padded", "https"]: fulltext = "//example.com/11" if fulltext_duplicate_arg in ["yes", "padded", "https"]: fulltext = IDENTS[0]["fulltext"] if fulltext_duplicate_arg == "padded": fulltext = " http:" + fulltext + " " elif fulltext_duplicate_arg == "https": fulltext = "https:" + fulltext else: fulltext = "http:" + fulltext elif article_fulltext_arg == "invalid": fulltext = IDENTS[-1]["fulltext"] source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) # we need to do this if doi or fulltext are none, because the factory will set a default if we don't # provide them if doi is None: article.bibjson().remove_identifiers("doi") if fulltext is None: article.bibjson().remove_urls("fulltext") article.set_id() Article.blockall(aids_block) ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.discover_duplicates(article, owner_id) else: possible_articles = svc.discover_duplicates(article, owner_id) if articles_by_doi_arg == "yes": assert "doi" in possible_articles assert len(possible_articles["doi"]) == 1 # if this is the "invalid" doi, then we expect it to match the final article, otherwise match the first if article_doi_arg == "invalid": assert possible_articles["doi"][0].id == article_ids[-1] else: assert possible_articles["doi"][0].id == article_ids[0] else: if possible_articles is not None: assert "doi" not in possible_articles if articles_by_fulltext_arg == "yes": assert "fulltext" in possible_articles assert len(possible_articles["fulltext"]) == 1 # if this is the "invalid" fulltext url, then we expect it to match the final article, otherwise match the first if article_fulltext_arg == "invalid": assert possible_articles["fulltext"][0].id == article_ids[-1] else: assert possible_articles["fulltext"][0].id == article_ids[0] else: if possible_articles is not None: assert "fulltext" not in possible_articles
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source(eissn="1234-5678", pissn="9876-5432", doi="10.123/abc/1", fulltext="http://example.com/1") article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = ["1234-5678", "9876-5432"] if account_arg == "owner" else [] shared = [] unowned = ["1234-5678"] if account_arg == "not_owner" else [] unmatched = ["9876-5432"] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status(owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate(eissn="1234-5678", pissn="9876-5432", doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None
def test_01_batch_create_article(self, name, kwargs): articles_arg = kwargs.get("articles") duplicate_in_batch_arg = kwargs.get("duplicate_in_batch") duplicate_in_index_arg = kwargs.get("duplicate_in_index") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") fail_arg = kwargs.get("fail") update_arg = kwargs.get("update") ############################################### ## set up success = int(success_arg) fail = int(fail_arg) update = int(update_arg) duplicate_in_batch = duplicate_in_batch_arg == "yes" duplicate_in_index = int(duplicate_in_index_arg) raises = EXCEPTIONS.get(raises_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) journal_specs = [] last_doi = None last_ft = None last_issn = None last_id = None articles = None if articles_arg != "none": articles = [] if articles_arg == "yes": # one with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="0000-0000", pissn="0000-0000", doi="10.123/abc/0", fulltext=False) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "0", "pissn": "0000-0000", "eissn": "0000-0000" }) # another with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="1111-1111", pissn="1111-1111", doi="10.123/abc/1", fulltext=False) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "1", "pissn": "1111-1111", "eissn": "1111-1111" }) # one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="2222-2222", pissn="2222-2222", fulltext="http://example.com/2", doi=False) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "2", "pissn": "2222-2222", "eissn": "2222-2222" }) # another one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="3333-3333", pissn="3333-3333", fulltext="http://example.com/3", doi=False) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "3", "pissn": "3333-3333", "eissn": "3333-3333" }) last_issn = "3333-3333" last_doi = "10.123/abc/1" last_ft = "http://example.com/3" last_id = articles[-1].id if duplicate_in_batch: # one with a duplicated DOI source = ArticleFixtureFactory.make_article_source( eissn="4444-4444", pissn="4444-4444", doi="10.123/abc/0", fulltext="http://example.com/4") del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "4", "pissn": "4444-4444", "eissn": "4444-4444" }) # one with a duplicated Fulltext source = ArticleFixtureFactory.make_article_source( eissn="5555-5555", pissn="5555-5555", doi="10.123/abc/5", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({ "title": "5", "pissn": "5555-5555", "eissn": "5555-5555" }) ilo_mock = None if account_arg == "owner": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True) elif account_arg == "own_1": ilo_mock = BLLArticleMockFactory.is_legitimate_owner( legit_on_issn=[last_issn]) else: ilo_mock = BLLArticleMockFactory.is_legitimate_owner() self.svc.is_legitimate_owner = ilo_mock gd_mock = None if duplicate_in_index == 1: gd_mock = BLLArticleMockFactory.get_duplicate( given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft) elif duplicate_in_index == 2: gd_mock = BLLArticleMockFactory.get_duplicate(merge_duplicate=True) else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], []) self.svc.issn_ownership_status = ios_mock self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated( False, False) if add_journal_info: gj_mock = ModelArticleMockFactory.get_journal(journal_specs) Article.get_journal = gj_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): try: self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) except exceptions.IngestException as e: if duplicate_in_index != 2: report = e.result assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update raise else: report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) # make sure all the articles are saved before running the asserts aids = [(a.id, a.last_updated) for a in articles] for aid, lu in aids: Article.block(aid, lu, sleep=0.05) assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update if success > 0: all_articles = Article.all() if len(all_articles) != success: time.sleep(0.5) all_articles = Article.all() assert len(all_articles) == success for article in all_articles: if add_journal_info: assert article.bibjson().journal_title is not None else: assert article.bibjson().journal_title is None else: # there's nothing in the article index with self.assertRaises(ESMappingMissingError): Article.all()
def test_01_batch_create_article(self, name, kwargs): articles_arg = kwargs.get("articles") duplicate_in_batch_arg = kwargs.get("duplicate_in_batch") duplicate_in_index_arg = kwargs.get("duplicate_in_index") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") fail_arg = kwargs.get("fail") update_arg = kwargs.get("update") ############################################### ## set up success = int(success_arg) fail = int(fail_arg) update = int(update_arg) duplicate_in_batch = duplicate_in_batch_arg == "yes" duplicate_in_index = duplicate_in_index_arg == "yes" raises = EXCEPTIONS.get(raises_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) last_doi = None last_ft = None last_issn = None last_id = None articles = None if articles_arg != "none": articles = [] if articles_arg == "yes": # one with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="0000-0000", pissn="0000-0000", doi="10.123/abc/0", fulltext=False) article = Article(**source) article.set_id() articles.append(article) # another with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="1111-1111", pissn="1111-1111", doi="10.123/abc/1", fulltext=False) article = Article(**source) article.set_id() articles.append(article) # one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="2222-2222", pissn="2222-2222", fulltext="http://example.com/2", doi=False) article = Article(**source) article.set_id() articles.append(article) # another one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="3333-3333", pissn="3333-3333", fulltext="http://example.com/3", doi=False) article = Article(**source) article.set_id() articles.append(article) last_issn = "3333-3333" last_doi = "10.123/abc/1" last_ft = "http://example.com/3" last_id = articles[-1].id if duplicate_in_batch: # one with a duplicated DOI source = ArticleFixtureFactory.make_article_source( eissn="4444-4444", pissn="4444-4444", doi="10.123/abc/0", fulltext="http://example.com/4") article = Article(**source) article.set_id() articles.append(article) # one with a duplicated Fulltext source = ArticleFixtureFactory.make_article_source( eissn="5555-5555", pissn="5555-5555", doi="10.123/abc/5", fulltext="http://example.com/1") article = Article(**source) article.set_id() articles.append(article) """ article_count = int(articles_arg) for i in range(article_count): idx = str(i) if duplicate_in_batch: if i < 2: idx = "duplicate" last_issn = str(i) * 4 + "-" + str(i) * 4 last_doi = "10.123/abc/" + idx last_ft = "http://example.com/" + idx source = ArticleFixtureFactory.make_article_source(eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft) article = Article(**source) article.set_id() last_id = article.id articles.append(article) """ ilo_mock = None if account_arg == "owner": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True) elif account_arg == "own_1": ilo_mock = BLLArticleMockFactory.is_legitimate_owner( legit_on_issn=[last_issn]) else: ilo_mock = BLLArticleMockFactory.is_legitimate_owner() self.svc.is_legitimate_owner = ilo_mock gd_mock = None if duplicate_in_index: gd_mock = BLLArticleMockFactory.get_duplicate( given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft) else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], []) self.svc.issn_ownership_status = ios_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): try: self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account) except exceptions.IngestException as e: report = e.result assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update raise else: report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account) # make sure all the articles are saved before running the asserts aids = [(a.id, a.last_updated) for a in articles] for aid, lu in aids: Article.block(aid, lu, sleep=0.05) assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update if success > 0: all_articles = Article.all() if len(all_articles) != success: time.sleep(0.5) all_articles = Article.all() assert len(all_articles) == success else: # there's nothing in the article index with self.assertRaises(ESMappingMissingError): Article.all()
def test_01_create_article(self, name, kwargs): article_arg = kwargs.get("article") article_duplicate_arg = kwargs.get("article_duplicate") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") dry_run_arg = kwargs.get("dry_run") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") original_saved_arg = kwargs.get("original_saved") merge_saved_arg = kwargs.get("merge_saved") ############################################### ## set up success = int(success_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False dry_run = None if dry_run_arg != "none": dry_run = True if dry_run_arg == "true" else False raises = EXCEPTIONS.get(raises_arg) eissn = "1234-5678" pissn = "9876-5432" if add_journal_info: jsource = JournalFixtureFactory.make_journal_source(in_doaj=True) j = Journal(**jsource) bj = j.bibjson() bj.title = "Add Journal Info Title" bj.remove_identifiers() bj.add_identifier(bj.P_ISSN, pissn) bj.add_identifier(bj.E_ISSN, eissn) j.save(blocking=True) article = None original_id = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") del source["bibjson"]["journal"] article = Article(**source) article.set_id() original_id = article.id account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) legit = True if account_arg == "owner" else False ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=legit) self.svc.is_legitimate_owner = ilo_mock owned = [eissn, pissn] if account_arg == "owner" else [] shared = [] unowned = [eissn] if account_arg == "not_owner" else [] unmatched = [pissn] if account_arg == "not_owner" else [] ios_mock = BLLArticleMockFactory.issn_ownership_status( owned, shared, unowned, unmatched) self.svc.issn_ownership_status = ios_mock gd_mock = None if article_duplicate_arg == "yes": gd_mock = BLLArticleMockFactory.get_duplicate( eissn=eissn, pissn=pissn, doi="10.123/abc/1", fulltext="http://example.com/1") else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock mock_article = self.svc.get_duplicate(article) ########################################################### # Execution if raises is not None: with self.assertRaises(raises): self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) else: report = self.svc.create_article(article, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info, dry_run) assert report["success"] == success # check that the article was saved and if it was saved that it was suitably merged if original_saved_arg == "yes": original = Article.pull(original_id) assert original is not None assert report["update"] == 0 elif article is not None: original = Article.pull(original_id) assert original is None if merge_saved_arg == "yes": merged = Article.pull(mock_article.id) assert merged is not None assert report["update"] == 1 elif mock_article is not None: merged = Article.pull(mock_article.id) assert merged is None if add_journal_info: assert article.bibjson( ).journal_title == "Add Journal Info Title"
def test_01_issn_ownership_status(self, name, kwargs): article_arg = kwargs.get("article") owner_arg = kwargs.get("owner") article_eissn_arg = kwargs.get("article_eissn") article_pissn_arg = kwargs.get("article_pissn") seen_eissn_arg = kwargs.get("seen_eissn") seen_pissn_arg = kwargs.get("seen_pissn") journal_owner_arg = kwargs.get("journal_owner") raises_arg = kwargs.get("raises") raises = EXCEPTIONS.get(raises_arg) ############################################### ## set up owner = None if owner_arg != "none": owner = Account(**AccountFixtureFactory.make_publisher_source()) owner_id = None if owner is not None: owner_id = owner.id # generate our incoming article article = None eissn = None pissn = None if article_arg == "exists": source = ArticleFixtureFactory.make_article_source() article = Article(**source) article.set_id() article.bibjson().remove_identifiers("pissn") if article_pissn_arg == "yes": pissn = "1234-5678" article.bibjson().add_identifier("pissn", pissn) article.bibjson().remove_identifiers("eissn") if article_eissn_arg == "yes": eissn = "9876-5432" article.bibjson().add_identifier("eissn", eissn) issns = [] if eissn is not None and pissn is not None and seen_eissn_arg == "yes" and seen_pissn_arg == "yes": issns.append((eissn, pissn)) if eissn is not None and seen_eissn_arg == "yes": issns.append((eissn, "4321-9876")) issns.append((eissn, None)) if pissn is not None and seen_pissn_arg == "yes": issns.append(("6789-4321", pissn)) issns.append((None, pissn)) owners = [] if journal_owner_arg == "none": owners = [None] elif journal_owner_arg == "correct" and owner_id is not None: owners = [owner_id] elif journal_owner_arg == "incorrect": owners = ["randomowner"] elif journal_owner_arg == "mix" and owner_id is not None: owners.append(owner_id) owners.append("randomowner") owners.append(None) mock = ModelJournalMockFactory.find_by_issn(issns, owners) Journal.find_by_issn = mock ########################################################### # Execution svc = DOAJ.articleService() if raises is not None: with self.assertRaises(raises): svc.issn_ownership_status(article, owner_id) else: owned, shared, unowned, unmatched = svc.issn_ownership_status( article, owner_id) owned_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "correct" ]: assert eissn in owned owned_count += 1 elif eissn is not None: assert eissn not in owned if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "correct" ]: assert pissn in owned owned_count += 1 elif pissn is not None: assert pissn not in owned assert len(owned) == owned_count shared_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "mix" ]: assert eissn in shared shared_count += 1 elif eissn is not None: assert eissn not in shared if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "mix" ]: assert pissn in shared shared_count += 1 elif pissn is not None: assert pissn not in shared assert len(shared) == shared_count unowned_count = 0 if seen_eissn_arg == "yes" and eissn is not None and journal_owner_arg in [ "incorrect", "none" ]: assert eissn in unowned unowned_count += 1 elif eissn is not None: assert eissn not in unowned if seen_pissn_arg == "yes" and pissn is not None and journal_owner_arg in [ "incorrect", "none" ]: assert pissn in unowned unowned_count += 1 elif pissn is not None: assert pissn not in unowned assert len(unowned) == unowned_count unmatched_count = 0 if seen_eissn_arg == "no" and eissn is not None: assert eissn in unmatched unmatched_count += 1 elif eissn is not None: assert eissn not in unmatched if seen_pissn_arg == "no" and pissn is not None: assert pissn in unmatched unmatched_count += 1 elif pissn is not None: assert pissn not in unmatched assert len(unmatched) == unmatched_count