def test_01_duplicates_report(self): """Check duplication reporting across all articles in the index""" # Create 2 identical articles, a duplicate pair article1 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True)) a1_doi = article1.bibjson().get_identifiers('doi') assert a1_doi is not None article1.save(blocking=True) time.sleep(1) article2 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True)) a2_doi = article2.bibjson().get_identifiers('doi') assert a2_doi == a1_doi article2.save(blocking=True) # Run the reporting task user = app.config.get("SYSTEM_USERNAME") job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare( user, outdir=TMP_DIR) task = article_duplicate_report.ArticleDuplicateReportBackgroundTask( job) task.run() # The audit log should show we saved the reports to the TMP_DIR defined above audit_1 = job.audit.pop(0) assert audit_1.get('message', '').endswith(TMP_DIR) assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') # It should also clean up its interim article csv assert not os.path.exists( paths.rel2abs(__file__, 'tmp_article_duplicate_report')) # The duplicates should be detected and appear in the report and audit summary count with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') as f: csvlines = f.readlines() # We expect one result line + headings: our newest article has 1 duplicate res = csvlines.pop() assert res.startswith( article2.id ) # The newest comes first, so article1 is article2's duplicate. assert article1.id in res assert 'doi+fulltext' in res audit_2 = job.audit.pop(0) assert audit_2.get( 'message', '' ) == '2 articles processed for duplicates. 1 global duplicate sets found.'
def test_09_search(self): # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles. # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section. qsvc = QueryService() articles = [] for i in range(0, 3): articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False))) assert articles[-1].publisher_record_id() == 'some_identifier' articles[-1].save(blocking=True) articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False, in_doaj=False))) articles[-1].save(blocking=True) res = qsvc.search('query', 'article', {"query": { "match_all": {} }}, account=None, additional_parameters={}) assert res['hits']['total'] == 3, res['hits']['total'] for hit in res['hits']['hits']: am = models.Article(**hit) assert am.publisher_record_id() is None, am.publisher_record_id()
def test_01_duplicates_report(self): """Check duplication reporting across all articles in the index""" # Create 2 identical articles, a duplicate pair article1 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True )) a1_doi = article1.bibjson().get_identifiers('doi') assert a1_doi is not None article1.save(blocking=True) time.sleep(1) article2 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True )) a2_doi = article2.bibjson().get_identifiers('doi') assert a2_doi == a1_doi article2.save(blocking=True) # Run the reporting task user = app.config.get("SYSTEM_USERNAME") job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare(user, outdir=TMP_DIR) task = article_duplicate_report.ArticleDuplicateReportBackgroundTask(job) task.run() # The audit log should show we saved the reports to the TMP_DIR defined above audit_1 = job.audit.pop(0) assert audit_1.get('message', '').endswith(TMP_DIR) assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') # It should also clean up its interim article csv assert not os.path.exists(paths.rel2abs(__file__, 'tmp_article_duplicate_report')) # The duplicates should be detected and appear in the report and audit summary count with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') as f: csvlines = f.readlines() # We expect one result line + headings: our newest article has 1 duplicate res = csvlines.pop() assert res.startswith(article2.id) # The newest comes first, so article1 is article2's duplicate. assert article1.id in res assert 'doi+fulltext' in res audit_2 = job.audit.pop(0) assert audit_2.get('message', '') == '2 articles processed for duplicates. 1 global duplicate sets found.'
def test_01_incoming_article_do(self): # make a blank one ia = IncomingArticleDO() # make one from an incoming article model fixture data = ArticleFixtureFactory.make_article_source() ia = IncomingArticleDO(data) # and one with an author email, which we have removed from the allowed fields recently. It should silently prune data = ArticleFixtureFactory.make_article_source() data["bibjson"]["author"][0]["email"] = "*****@*****.**" ia = IncomingArticleDO(data) assert "*****@*****.**" not in ia.json() # make another one that's broken data = ArticleFixtureFactory.make_article_source() del data["bibjson"]["title"] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # now progressively remove the conditionally required/advanced validation stuff # # missing identifiers data = ArticleFixtureFactory.make_article_source() data["bibjson"]["identifier"] = [] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # no issns specified data["bibjson"]["identifier"] = [{"type": "wibble", "id": "alksdjfas"}] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # issns the same (but not normalised the same) data["bibjson"]["identifier"] = [{ "type": "pissn", "id": "12345678" }, { "type": "eissn", "id": "1234-5678" }] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # too many keywords data = ArticleFixtureFactory.make_article_source() data["bibjson"]["keywords"] = [ "one", "two", "three", "four", "five", "six", "seven" ] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data)
def setUp(self): super(TestCreateOrUpdateArticle, self).setUp() self.publisher = Account() self.publisher.add_role("publisher") self.publisher.save(blocking=True) self.admin = Account() self.admin.add_role("admin") self.admin.save(blocking=True) sources = JournalFixtureFactory.make_many_journal_sources(2, True) self.journal1 = Journal(**sources[0]) self.journal1.set_owner(self.publisher.id) jbib1 = self.journal1.bibjson() jbib1.add_identifier(jbib1.P_ISSN, "1111-1111") jbib1.add_identifier(jbib1.E_ISSN, "2222-2222") self.journal1.save(blocking=True) self.publisher.add_journal(self.journal1) self.journal2 = Journal(**sources[1]) jbib2 = self.journal2.bibjson() jbib2.add_identifier(jbib2.P_ISSN, "1234-5678") jbib2.add_identifier(jbib2.E_ISSN, "9876-5432") self.journal2.save(blocking=True) self.article10 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-10", fulltext="https://www.article10.com")) self.article10.set_id("articleid10") self.article10.save(blocking=True) self.article11 = Article(**ArticleFixtureFactory.make_article_source( pissn="1111-1111", eissn="2222-2222", doi="10.0000/article-11", fulltext="https://www.article11.com")) self.article11.set_id("articleid11") self.article11.save(blocking=True) self.article2 = Article(**ArticleFixtureFactory.make_article_source( pissn="1234-5678", eissn="9876-5432", doi="10.0000/article-2", fulltext="https://www.article2.com")) self.article2.set_id("articleid2") self.article2.save(blocking=True)
def test_09_article(self): """test if the OAI-PMH journal feed returns records and only displays journals accepted in DOAJ""" article_source = ArticleFixtureFactory.make_article_source(eissn='1234-1234', pissn='5678-5678,', in_doaj=False) """test if the OAI-PMH article feed returns records and only displays articles accepted in DOAJ""" a_private = models.Article(**article_source) ba = a_private.bibjson() ba.title = "Private Article" a_private.save(blocking=True) article_source = ArticleFixtureFactory.make_article_source(eissn='4321-4321', pissn='8765-8765,', in_doaj=True) a_public = models.Article(**article_source) ba = a_public.bibjson() ba.title = "Public Article" a_public.save(blocking=True) public_id = a_public.id time.sleep(1) with self.app_test.test_request_context(): with self.app_test.test_client() as t_client: resp = t_client.get(url_for('oaipmh.oaipmh', specified='article', verb='ListRecords', metadataPrefix='oai_dc')) assert resp.status_code == 200 t = etree.fromstring(resp.data) records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns) # Check we only have one journal returned r = records[0].xpath('//oai:record', namespaces=self.oai_ns) assert len(r) == 1 # Check we have the correct journal title = r[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text # check orcid_id xwalk assert str(records[0].xpath('//dc:creator/@id', namespaces=self.oai_ns)[0]) == a_public.bibjson().author[0].get("orcid_id") assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title resp = t_client.get(url_for('oaipmh.oaipmh', specified='article', verb='GetRecord', metadataPrefix='oai_dc') + '&identifier=abcdefghijk_article') assert resp.status_code == 200 t = etree.fromstring(resp.data) records = t.xpath('/oai:OAI-PMH/oai:GetRecord', namespaces=self.oai_ns) # Check we only have one journal returnedt kids = records[0].getchildren() r = records[0].xpath('//oai:record', namespaces=self.oai_ns) assert len(r) == 1 # Check we have the correct journal assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title
def test_11_delete_article_fail(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # call delete on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesCrudApi.delete(a.id, None) # with the wrong account account.set_id("other") with self.assertRaises(Api404Error): ArticlesCrudApi.delete(a.id, account) # on the wrong id account.set_id("test") with self.assertRaises(Api404Error): ArticlesCrudApi.delete("adfasdfhwefwef", account)
def test_01_withdraw_task(self): sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=True) ids = [] articles = [] for source in sources: j = models.Journal(**source) j.save() ids.append(j.id) pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN) eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN) asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False) a = models.Article(**asource) a.save() articles.append(a.id) time.sleep(2) job = SetInDOAJBackgroundTask.prepare("testuser", journal_ids=ids, in_doaj=False) SetInDOAJBackgroundTask.submit(job) time.sleep(2) for id in ids: j = models.Journal.pull(id) assert j.is_in_doaj() is False for id in articles: a = models.Article.pull(id) assert a.is_in_doaj() is False
def setUp(self): super(TestTaskJournalBulkDelete, self).setUp() self.journals = [] self.articles = [] for j_src in JournalFixtureFactory.make_many_journal_sources( count=TEST_JOURNAL_COUNT): j = models.Journal(**j_src) self.journals.append(j) j.save() for i in range(0, TEST_ARTICLES_PER_JOURNAL): a = models.Article(**ArticleFixtureFactory.make_article_source( with_id=False, eissn=j.bibjson().first_eissn, pissn=j.bibjson().first_pissn)) a.save() self.articles.append(a) sleep(2) self.forbidden_accounts = [ AccountFixtureFactory.make_editor_source()['id'], AccountFixtureFactory.make_assed1_source()['id'], AccountFixtureFactory.make_assed2_source()['id'], AccountFixtureFactory.make_assed3_source()['id'] ] self._make_and_push_test_context(acc=models.Account( **AccountFixtureFactory.make_managing_editor_source()))
def test_02_create_duplicate_articles(self): # set up all the bits we need - 10 articles data = ArticleFixtureFactory.make_incoming_api_article() dataset = [data] * 10 # create an account that we'll do the create as account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(2) # call create on the object (which will save it to the index) with self.assertRaises(Api400Error): ids = ArticlesBulkApi.create(dataset, account) time.sleep(2) with self.assertRaises(ESMappingMissingError): all_articles = models.Article.all()
def test_04_article_structure_exceptions(self): # add some data to the index with a Create user_data = ArticleFixtureFactory.make_article_source() with self.app_test.test_client() as t_client: # log into the app as our user self.login(t_client, 'test', 'password123') # attempt to CREATE a new article with invalid JSON bad_data = json.dumps(user_data) + 'blarglrandomblah' response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=bad_data) assert response.status_code == 400 # 400 "Bad Request" assert response.mimetype == 'application/json' assert 'Supplied data was not valid JSON' in response.json['error'] # attempt to CREATE a new article with too many keywords (exception propagates from DataObj) too_many_kwds = deepcopy(user_data) too_many_kwds['bibjson']['keywords'] = ['one', 'two', 'three', 'four', 'five', 'six', 'SEVEN'] response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(too_many_kwds)) assert response.status_code == 400 # 400 "Bad Request" assert response.mimetype == 'application/json' assert 'maximum of 6 keywords' in response.json['error'] # attempt to CREATE an article with a missing required field (exception propagates from DataObj) missing_title = deepcopy(user_data) del missing_title['bibjson']['title'] response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(missing_title)) assert response.status_code == 400 # 400 "Bad Request" assert response.mimetype == 'application/json' assert "Field 'title' is required but not present" in response.json['error']
def test_10_delete_article_success(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # now delete it ArticlesCrudApi.delete(a.id, account) # let the index catch up time.sleep(1) ap = models.Article.pull(a.id) assert ap is None
def get_duplicate(cls, return_none=False, given_article_id=None, eissn=None, pissn=None, doi=None, fulltext=None, merge_conflict=False): article = None if not return_none and not merge_conflict: source = ArticleFixtureFactory.make_article_source( eissn=eissn, pissn=pissn, doi=doi, fulltext=fulltext) article = Article(**source) article.set_id() def mock(*args, **kwargs): if merge_conflict: raise ArticleMergeConflict() supplied_article = args[0] if given_article_id is not None: if given_article_id == supplied_article.id: return article else: return article return mock
def test_03_create_articles_fail(self): # if the account is dud with self.assertRaises(Api401Error): data = ArticleFixtureFactory.make_incoming_api_article() dataset = [data] * 10 ids = ArticlesBulkApi.create(dataset, None) # check that the index is empty, as none of them should have been made all = [x for x in models.Article.iterall()] assert len(all) == 0 # if the data is bust with self.assertRaises(Api400Error): account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal( **JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) dataset = dataset[:5] + [{"some": {"junk": "data"}}] + dataset[5:] ids = ArticlesBulkApi.create(dataset, account) # check that the index is empty, as none of them should have been made all = [x for x in models.Article.iterall()] assert len(all) == 0
def test_02_create_duplicate_articles(self): # set up all the bits we need - 10 articles data = ArticleFixtureFactory.make_incoming_api_article() dataset = [data] * 10 # create an account that we'll do the create as account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(2) # call create on the object (which will save it to the index) with self.assertRaises(Api400Error): ids = ArticlesBulkApi.create(dataset, account) time.sleep(2) with self.assertRaises(ESMappingMissingError): all_articles = models.Article.all()
def test_03_withdraw(self): acc = models.Account() acc.set_name("testuser") ctx = self._make_and_push_test_context(acc=acc) sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=True) ids = [] articles = [] for source in sources: j = models.Journal(**source) j.save() ids.append(j.id) pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN) eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN) asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False) a = models.Article(**asource) a.save() articles.append(a.id) time.sleep(2) change_in_doaj(ids, False) time.sleep(2) for id in ids: j = models.Journal.pull(id) assert j.is_in_doaj() is False for id in articles: a = models.Article.pull(id) assert a.is_in_doaj() is False ctx.pop()
def test_02_reinstate_task(self): sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=False) ids = [] articles = [] for source in sources: j = models.Journal(**source) j.save() ids.append(j.id) pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN) eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN) asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False, in_doaj=False) a = models.Article(**asource) a.save() articles.append(a.id) time.sleep(2) job = SetInDOAJBackgroundTask.prepare("testuser", journal_ids=ids, in_doaj=True) SetInDOAJBackgroundTask.submit(job) time.sleep(2) for id in ids: j = models.Journal.pull(id) assert j.is_in_doaj() is True for id in articles: a = models.Article.pull(id) assert a.is_in_doaj() is True
def test_03_create_articles_fail(self): # if the account is dud with self.assertRaises(Api401Error): data = ArticleFixtureFactory.make_incoming_api_article() dataset = [data] * 10 ids = ArticlesBulkApi.create(dataset, None) # check that the index is empty, as none of them should have been made all = [x for x in models.Article.iterall()] assert len(all) == 0 # if the data is bust with self.assertRaises(Api400Error): account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) dataset = dataset[:5] + [{"some" : {"junk" : "data"}}] + dataset[5:] ids = ArticlesBulkApi.create(dataset, account) # check that the index is empty, as none of them should have been made all = [x for x in models.Article.iterall()] assert len(all) == 0
def test_07_retrieve_article_fail(self): # set up all the bits we need # add a journal to the account account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() data['admin']['in_doaj'] = False ap = models.Article(**data) ap.save() time.sleep(1) # should fail when no user and in_doaj is False with self.assertRaises(Api401Error): a = ArticlesCrudApi.retrieve(ap.id, None) # wrong user account = models.Account() account.set_id("asdklfjaioefwe") with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve(ap.id, account) # non-existant article account = models.Account() account.set_id(ap.id) with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
def test_11_delete_article_fail(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # call delete on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesCrudApi.delete(a.id, None) # with the wrong account account.set_id("other") with self.assertRaises(Api404Error): ArticlesCrudApi.delete(a.id, account) # on the wrong id account.set_id("test") with self.assertRaises(Api404Error): ArticlesCrudApi.delete("adfasdfhwefwef", account)
def test_has_permissions(self): journal_source = JournalFixtureFactory.make_journal_source() journal1 = Journal(**journal_source) publisher_owner_src = AccountFixtureFactory.make_publisher_source() publisher_owner = Account(**publisher_owner_src) publisher_stranged_src = AccountFixtureFactory.make_publisher_source() publisher_stranged = Account(**publisher_stranged_src) admin_src = AccountFixtureFactory.make_managing_editor_source() admin = Account(**admin_src) journal1.set_owner(publisher_owner) journal1.save(blocking=True) eissn = journal1.bibjson().get_one_identifier("eissn") pissn = journal1.bibjson().get_one_identifier("pissn") art_source = ArticleFixtureFactory.make_article_source(eissn=eissn, pissn=pissn) article = Article(**art_source) assert self.svc.has_permissions(publisher_stranged, article, False) assert self.svc.has_permissions(publisher_owner, article, True) assert self.svc.has_permissions(admin, article, True) failed_result = self.svc.has_permissions(publisher_stranged, article, True) assert failed_result == {'success': 0, 'fail': 1, 'update': 0, 'new': 0, 'shared': [], 'unowned': [pissn, eissn], 'unmatched': []}, "received: {}".format(failed_result)
def test_10_delete_article_success(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # now delete it ArticlesCrudApi.delete(a.id, account) # let the index catch up time.sleep(1) ap = models.Article.pull(a.id) assert ap is None
def test_07_retrieve_article_fail(self): # set up all the bits we need # add a journal to the account account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() data['admin']['in_doaj'] = False ap = models.Article(**data) ap.save() time.sleep(1) # should fail when no user and in_doaj is False with self.assertRaises(Api401Error): a = ArticlesCrudApi.retrieve(ap.id, None) # wrong user account = models.Account() account.set_id("asdklfjaioefwe") with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve(ap.id, account) # non-existant article account = models.Account() account.set_id(ap.id) with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
def test_prepare_update_admin(self, value, kwargs): Article.merge = BLLArticleMockFactory.merge_mock Article.pull = BLLArticleMockFactory.pull_mock duplicate_arg = kwargs.get("duplicate") merge_duplicate_arg = kwargs.get("merge_duplicate") is_update_arg = kwargs.get("is_update") raises_arg = kwargs.get("raises") pissn1 = "1234-5678" eissn1 = "9876-5432" pissn2 = "1111-1111" eissn2 = "2222-2222" doi = "10.1234/article-10" ft = "https://example.com" update_article_id = "update_id" article_src = ArticleFixtureFactory.make_article_source(pissn=pissn1, eissn=eissn1, doi=doi, fulltext=ft) article = Article(**article_src) article.set_id("article_id") duplicate = None if duplicate_arg != "none": duplicate_src = ArticleFixtureFactory.make_article_source( pissn=pissn2, eissn=eissn2, doi=doi, fulltext=ft) duplicate = Article(**duplicate_src) if duplicate_arg == "same_as_update_article_id": duplicate.set_id("update_id") elif duplicate_arg == "different_then_update_article_id": duplicate.set_id("duplicate_id") merge_duplicate = True if merge_duplicate_arg == "yes" else False if raises_arg == "DuplicateArticle": with self.assertRaises(exceptions.DuplicateArticleException): self.svc._prepare_update_admin(article, duplicate, update_article_id, merge_duplicate) else: assert self.svc._prepare_update_admin( article, duplicate, update_article_id, merge_duplicate) == int(is_update_arg)
def test_prepare_update_publisher(self, value, kwargs): Article.merge = BLLArticleMockFactory.merge_mock duplicate_arg = kwargs.get("duplicate") merge_duplicate_arg = kwargs.get("merge_duplicate") doi_or_ft_update_arg = kwargs.get("doi_or_ft_updated") is_update_arg = kwargs.get("is_update") raises_arg = kwargs.get("raises") pissn1 = "1234-5678" eissn1 = "9876-5432" pissn2 = "1111-1111" eissn2 = "2222-2222" doi = "10.1234/article-10" ft = "https://example.com" if doi_or_ft_update_arg == "yes": self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated(True,True) else: self.svc._doi_or_fulltext_updated = BLLArticleMockFactory.doi_or_fulltext_updated(False,False) article_src = ArticleFixtureFactory.make_article_source(pissn=pissn1, eissn=eissn1, doi=doi, fulltext=ft) article = Article(**article_src) article.set_id("article_id") duplicate = None if duplicate_arg != "none": duplicate_src = ArticleFixtureFactory.make_article_source(pissn=pissn2, eissn=eissn2, doi=doi, fulltext=ft) duplicate = Article(**duplicate_src) if duplicate_arg == "same_as_article_id": duplicate.set_id("article_id") elif duplicate_arg == "different_than_article_id": duplicate.set_id("duplicate_id") merge_duplicate = True if merge_duplicate_arg == "yes" else False if duplicate_arg == "different_than_article_id": self.svc.has_permissions = BLLArticleMockFactory.has_permissions(False) else: self.svc.has_permissions = BLLArticleMockFactory.has_permissions(True) if raises_arg == "DuplicateArticle": with self.assertRaises(exceptions.DuplicateArticleException): self.svc._prepare_update_publisher(article,duplicate,merge_duplicate,self.publisher,True) else: assert self.svc._prepare_update_publisher(article,duplicate,merge_duplicate,self.publisher,True) == int(is_update_arg)
def test_10_scroll(self): # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles. # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section. qsvc = QueryService() articles = [] for i in range(0, 3): articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False))) assert articles[-1].publisher_record_id() == 'some_identifier' articles[-1].save(blocking=True) articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, in_doaj=False))) articles[-1].save(blocking=True) q = {"query": {"match_all": {}}} for res in qsvc.scroll('api_query', 'article', q, None, None): am = models.Article(**res) assert am.publisher_record_id() is None, am.publisher_record_id()
def test_14_article_model_index(self): """Check article indexes generate""" a = models.Article(**ArticleFixtureFactory.make_article_source()) assert a.data.get('index', None) is None # Generate the index a.prep() assert a.data.get('index', None) is not None
def test_33_article_stats(self): articles = [] # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search for i in range(1, 3): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=True)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(3, 5): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=False)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(5, 7): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=True)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(7, 9): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=False)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(len(articles)): articles[i].save(blocking=i == len(articles) - 1) journal = models.Journal() bj = journal.bibjson() bj.add_identifier(bj.P_ISSN, "1111-1111") stats = journal.article_stats() assert stats.get("total") == 2 assert stats.get("latest") == "2019-01-02T00:00:00Z"
def test_01_create_articles_success(self): def find_dict_in_list(lst, key, value): for i, dic in enumerate(lst): if dic[key] == value: return i return -1 # set up all the bits we need - 10 articles dataset = [] for i in range(1, 11): data = ArticleFixtureFactory.make_incoming_api_article() # change the DOI and fulltext URLs to escape duplicate detection # and try with multiple articles doi_ix = find_dict_in_list(data['bibjson']['identifier'], 'type', 'doi') if doi_ix == -1: data['bibjson']['identifier'].append({"type": "doi"}) data['bibjson']['identifier'][doi_ix][ 'id'] = '10.0000/SOME.IDENTIFIER.{0}'.format(i) fulltext_url_ix = find_dict_in_list(data['bibjson']['link'], 'type', 'fulltext') if fulltext_url_ix == -1: data['bibjson']['link'].append({"type": "fulltext"}) data['bibjson']['link'][fulltext_url_ix][ 'url'] = 'http://www.example.com/article_{0}'.format(i) dataset.append(deepcopy(data)) # create an account that we'll do the create as account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(2) # call create on the object (which will save it to the index) ids = ArticlesBulkApi.create(dataset, account) # check that we got the right number of ids back assert len(ids) == 10 assert len(list(set(ids))) == 10, len(list( set(ids))) # are they actually 10 unique IDs? # let the index catch up time.sleep(2) # check that each id was actually created for id in ids: s = models.Article.pull(id) assert s is not None
def discover_duplicates(cls, doi_duplicates=0, fulltext_duplicates=0, overlap=0): if overlap > doi_duplicates or overlap > fulltext_duplicates: raise Exception( "overlap must be the same as or less than either of doi_duplicates or fulltext_duplicates" ) idents = [] # first make duplicate records for the total number of desired dois for i in range(doi_duplicates): idents.append({ "doi_domain": True, "doi": "10.1234/abc/1", "fulltext": "http://example.com/unique/" + str(i) }) for i in range(overlap): idents[i]["fulltext"] = "http://example.com/1" idents[i]["fulltext_domain"] = True remaining_fulltexts = fulltext_duplicates - overlap for i in range(remaining_fulltexts): idents.append({ "fulltext_domain": True, "doi": "10.1234/unique/" + str(i), "fulltext": "http://example.com/1" }) possible_duplicates = {"doi": [], "fulltext": []} for i, ident in enumerate(idents): source = ArticleFixtureFactory.make_article_source( eissn="1234-5678", pissn="9876-5432", doi=ident["doi"], fulltext=["fulltext"]) article = Article(**source) article.set_id() article.data["last_updated"] = datetime.fromtimestamp( i * 100000).strftime("%Y-%m-%dT%H:%M:%SZ") if "doi_domain" in ident: possible_duplicates["doi"].append(article) if "fulltext_domain" in ident: possible_duplicates["fulltext"].append(article) if len(possible_duplicates["doi"]) == 0: del possible_duplicates["doi"] if len(possible_duplicates["fulltext"]) == 0: del possible_duplicates["fulltext"] def mock(article, owner=None, results_per_match_type=10): return possible_duplicates return mock
def test_01_incoming_article_do(self): # make a blank one ia = IncomingArticleDO() # make one from an incoming article model fixture data = ArticleFixtureFactory.make_article_source() ia = IncomingArticleDO(data) # and one with an author email, which we have removed from the allowed fields recently. It should silently prune data = ArticleFixtureFactory.make_article_source() data["bibjson"]["author"][0]["email"] = "*****@*****.**" ia = IncomingArticleDO(data) assert "*****@*****.**" not in ia.json() # make another one that's broken data = ArticleFixtureFactory.make_article_source() del data["bibjson"]["title"] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # now progressively remove the conditionally required/advanced validation stuff # # missing identifiers data = ArticleFixtureFactory.make_article_source() data["bibjson"]["identifier"] = [] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # no issns specified data["bibjson"]["identifier"] = [{"type" : "wibble", "id": "alksdjfas"}] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # issns the same (but not normalised the same) data["bibjson"]["identifier"] = [{"type" : "pissn", "id": "12345678"}, {"type" : "eissn", "id": "1234-5678"}] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # too many keywords data = ArticleFixtureFactory.make_article_source() data["bibjson"]["keywords"] = ["one", "two", "three", "four", "five", "six", "seven"] with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data)
def test_02_toc_requirements(self): """ Check what we need for ToCs are in the article models """ a = models.Article(**ArticleFixtureFactory.make_article_source()) a.prep() # To build ToCs we need a volume, an issue, a year and a month. assert a.data['bibjson']['journal']['volume'] == '1' assert a.data['bibjson']['journal']['number'] == '99' assert a.data['index']['date'] == "1991-01-01T00:00:00Z" assert a.data['index']['date_toc_fv_month'] == a.data['index']['date'] == "1991-01-01T00:00:00Z"
def test_09_update_article_fail(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # get a copy of the newly created version for use in assertions later created = models.Article.pull(a.id) # now make an updated version of the object data = ArticleFixtureFactory.make_article_source() data["bibjson"]["title"] = "An updated title" # call update on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesCrudApi.update(a.id, data, None) # with the wrong account account.set_id("other") with self.assertRaises(Api404Error): ArticlesCrudApi.update(a.id, data, account) # on the wrong id account.set_id("test") with self.assertRaises(Api404Error): ArticlesCrudApi.update("adfasdfhwefwef", data, account)
def test_09_search(self): # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles. # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section. qsvc = QueryService() articles = [] for i in range(0, 3): articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False))) assert articles[-1].publisher_record_id() == 'some_identifier' articles[-1].save(blocking=True) articles.append(models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, in_doaj=False))) articles[-1].save(blocking=True) res = qsvc.search('query', 'article', {"query": {"match_all": {}}}, account=None, additional_parameters={}) assert res['hits']['total'] == 3, res['hits']['total'] for hit in res['hits']['hits']: am = models.Article(**hit) assert am.publisher_record_id() is None, am.publisher_record_id()
def test_09_update_article_fail(self): # set up all the bits we need account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() # call create on the object (which will save it to the index) a = ArticlesCrudApi.create(data, account) # let the index catch up time.sleep(1) # get a copy of the newly created version for use in assertions later created = models.Article.pull(a.id) # now make an updated version of the object data = ArticleFixtureFactory.make_article_source() data["bibjson"]["title"] = "An updated title" # call update on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesCrudApi.update(a.id, data, None) # with the wrong account account.set_id("other") with self.assertRaises(Api404Error): ArticlesCrudApi.update(a.id, data, account) # on the wrong id account.set_id("test") with self.assertRaises(Api404Error): ArticlesCrudApi.update("adfasdfhwefwef", data, account)
def test_04_coerce(self): data = ArticleFixtureFactory.make_article_source() # first some successes data["bibjson"]["link"][0][ "url"] = "http://www.example.com/this_location/here" # protocol required data["bibjson"]["link"][0]["type"] = "fulltext" data["admin"]["in_doaj"] = False data["created_date"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") ia = IncomingArticleDO(data) assert isinstance(ia.bibjson.title, unicode) # now test some failures # an invalid urls data = ArticleFixtureFactory.make_article_source() data["bibjson"]["link"][0]["url"] = "Two streets down on the left" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) data["bibjson"]["link"][0][ "url"] = "www.example.com/this_location/here" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # an invalid link type data = ArticleFixtureFactory.make_article_source() data["bibjson"]["link"][0]["type"] = "cheddar" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # invalid bool data = ArticleFixtureFactory.make_article_source() data["admin"]["in_doaj"] = "Yes" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # invalid date data = ArticleFixtureFactory.make_article_source() data["created_date"] = "Just yesterday" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data)
def test_05_delete_articles_fail(self): # set up all the bits we need dataset = [] for i in range(10): data = ArticleFixtureFactory.make_incoming_api_article( doi="10.123/test/" + str(i), fulltext="http://example.com/" + str(i)) dataset.append(data) # create the main account we're going to work as article_owner = models.Account() article_owner.set_id("test") article_owner.set_name("Tester") article_owner.set_email("*****@*****.**") # create another account which will own the articles so the one # above will be "another user" trying to delete our precious articles. somebody_else = models.Account() somebody_else.set_id("somebody_else") somebody_else.set_name("Somebody Else") somebody_else.set_email("*****@*****.**") # add a journal to the article owner account to create that link # between account and articles journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(article_owner.id) journal.save() time.sleep(1) # call create on the objects (which will save it to the index) ids = ArticlesBulkApi.create(dataset, article_owner) # let the index catch up time.sleep(2) # call delete on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesBulkApi.delete(ids, None) # with the wrong account article_owner.set_id("other") with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, somebody_else) # on the wrong id ids.append("adfasdfhwefwef") article_owner.set_id("test") with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, article_owner) with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, article_owner)
def test_10_scroll(self): # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles. # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section. qsvc = QueryService() articles = [] for i in range(0, 3): articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False))) assert articles[-1].publisher_record_id() == 'some_identifier' articles[-1].save(blocking=True) articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False, in_doaj=False))) articles[-1].save(blocking=True) q = {"query": {"match_all": {}}} for res in qsvc.scroll('api_query', 'article', q, None, None): am = models.Article(**res) assert am.publisher_record_id() is None, am.publisher_record_id()
def test_03_toc_uses_both_issns_when_available(self): j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) pissn = j.bibjson().first_pissn eissn = j.bibjson().first_eissn j.set_last_manual_update() j.save(blocking=True) a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=pissn, eissn=eissn, in_doaj=True)) a.save(blocking=True) with self.app_test.test_client() as t_client: response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn())) assert response.status_code == 200 assert 'var toc_issns = ["{pissn}","{eissn}"];'.format(pissn=pissn, eissn=eissn) in response.data
def test_01_create_articles_success(self): def find_dict_in_list(lst, key, value): for i, dic in enumerate(lst): if dic[key] == value: return i return -1 # set up all the bits we need - 10 articles dataset = [] for i in range(1, 11): data = ArticleFixtureFactory.make_incoming_api_article() # change the DOI and fulltext URLs to escape duplicate detection # and try with multiple articles doi_ix = find_dict_in_list(data['bibjson']['identifier'], 'type', 'doi') if doi_ix == -1: data['bibjson']['identifier'].append({"type" : "doi"}) data['bibjson']['identifier'][doi_ix]['id'] = '10.0000/SOME.IDENTIFIER.{0}'.format(i) fulltext_url_ix = find_dict_in_list(data['bibjson']['link'], 'type', 'fulltext') if fulltext_url_ix == -1: data['bibjson']['link'].append({"type" : "fulltext"}) data['bibjson']['link'][fulltext_url_ix]['url'] = 'http://www.example.com/article_{0}'.format(i) dataset.append(deepcopy(data)) # create an account that we'll do the create as account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(2) # call create on the object (which will save it to the index) ids = ArticlesBulkApi.create(dataset, account) # check that we got the right number of ids back assert len(ids) == 10 assert len(list(set(ids))) == 10, len(list(set(ids))) # are they actually 10 unique IDs? # let the index catch up time.sleep(2) # check that each id was actually created for id in ids: s = models.Article.pull(id) assert s is not None
def test_04_coerce(self): data = ArticleFixtureFactory.make_article_source() # first some successes data["bibjson"]["link"][0]["url"] = "http://www.example.com/this_location/here" # protocol required data["bibjson"]["link"][0]["type"] = "fulltext" data["admin"]["in_doaj"] = False data["created_date"] = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") ia = IncomingArticleDO(data) assert isinstance(ia.bibjson.title, unicode) # now test some failures # an invalid urls data = ArticleFixtureFactory.make_article_source() data["bibjson"]["link"][0]["url"] = "Two streets down on the left" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) data["bibjson"]["link"][0]["url"] = "www.example.com/this_location/here" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # an invalid link type data = ArticleFixtureFactory.make_article_source() data["bibjson"]["link"][0]["type"] = "cheddar" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # invalid bool data = ArticleFixtureFactory.make_article_source() data["admin"]["in_doaj"] = "Yes" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data) # invalid date data = ArticleFixtureFactory.make_article_source() data["created_date"] = "Just yesterday" with self.assertRaises(DataStructureException): ia = IncomingArticleDO(data)
def test_03_articles_crud(self): # add some data to the index with a Create user_data = ArticleFixtureFactory.make_article_source() with self.app_test.test_client() as t_client: # log into the app as our user self.login(t_client, 'test', 'password123') # CREATE a new article response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(user_data)) assert response.status_code == 201 # 201 "Created" assert response.mimetype == 'application/json' # Check it gives back a newly created article, with an ID new_ar_id = json.loads(response.data.decode("utf-8"))['id'] new_ar_loc = json.loads(response.data.decode("utf-8"))['location'] assert new_ar_id is not None assert new_ar_id in new_ar_loc # RETRIEVE the same article using the ID response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 200 # 200 "OK" assert response.mimetype == 'application/json' retrieved_article = json.loads(response.data.decode("utf-8")) new_ar_title = retrieved_article['bibjson']['title'] assert new_ar_title == user_data['bibjson']['title'] # UPDATE the title of the article updated_data = deepcopy(user_data) updated_data['bibjson']['title'] = 'This is a new title for this article' response = t_client.put('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key), data=json.dumps(updated_data)) assert response.status_code == 204 # 204 "No Content" assert response.mimetype == 'application/json' response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) retrieved_article = json.loads(response.data.decode("utf-8")) new_ar_title = retrieved_article['bibjson']['title'] assert new_ar_title == updated_data['bibjson']['title'] assert new_ar_title != user_data['bibjson']['title'] # DELETE the article assert models.Article.pull(new_ar_id) is not None response = t_client.delete('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 204 # 204 "No Content" assert response.mimetype == 'application/json' # Try to RETRIEVE the article again - check it isn't there anymore response = t_client.get('/api/v1/applications/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 404 assert response.mimetype == 'application/json'
def test_03_articles_crud(self): # add some data to the index with a Create user_data = ArticleFixtureFactory.make_article_source() with self.app_test.test_client() as t_client: # log into the app as our user self.login(t_client, 'test', 'password123') # CREATE a new article response = t_client.post('/api/v1/articles?api_key=' + self.api_key, data=json.dumps(user_data)) assert response.status_code == 201 # 201 "Created" assert response.mimetype == 'application/json' # Check it gives back a newly created article, with an ID new_ar_id = json.loads(response.data)['id'] new_ar_loc = json.loads(response.data)['location'] assert new_ar_id is not None assert new_ar_id in new_ar_loc # RETRIEVE the same article using the ID response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 200 # 200 "OK" assert response.mimetype == 'application/json' retrieved_article = json.loads(response.data) new_ar_title = retrieved_article['bibjson']['title'] assert new_ar_title == user_data['bibjson']['title'] # UPDATE the title of the article updated_data = deepcopy(user_data) updated_data['bibjson']['title'] = 'This is a new title for this article' response = t_client.put('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key), data=json.dumps(updated_data)) assert response.status_code == 204 # 204 "No Content" assert response.mimetype == 'application/json' response = t_client.get('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) retrieved_article = json.loads(response.data) new_ar_title = retrieved_article['bibjson']['title'] assert new_ar_title == updated_data['bibjson']['title'] assert new_ar_title != user_data['bibjson']['title'] # DELETE the article assert models.Article.pull(new_ar_id) is not None response = t_client.delete('/api/v1/articles/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 204 # 204 "No Content" assert response.mimetype == 'application/json' # Try to RETRIEVE the article again - check it isn't there anymore response = t_client.get('/api/v1/applications/{0}?api_key={1}'.format(new_ar_id, self.api_key)) assert response.status_code == 404 assert response.mimetype == 'application/json'
def test_33_article_stats(self): articles = [] # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search for i in range(1, 3): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=True) ) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(3, 5): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=False) ) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(5, 7): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=True) ) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(7, 9): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=False) ) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(len(articles)): articles[i].save(blocking=i == len(articles) - 1) journal = models.Journal() bj = journal.bibjson() bj.add_identifier(bj.P_ISSN, "1111-1111") stats = journal.article_stats() assert stats.get("total") == 2 assert stats.get("latest") == "2019-01-02T00:00:00Z"
def test_03_create_article_fail(self): # if the account is dud with self.assertRaises(Api401Error): data = ArticleFixtureFactory.make_article_source() a = ArticlesCrudApi.create(data, None) # if the data is bust with self.assertRaises(Api400Error): account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") data = {"some" : {"junk" : "data"}} a = ArticlesCrudApi.create(data, account)
def test_03_create_article_fail(self): # if the account is dud with self.assertRaises(Api401Error): data = ArticleFixtureFactory.make_article_source() a = ArticlesCrudApi.create(data, None) # if the data is bust with self.assertRaises(Api400Error): account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") data = {"some": {"junk": "data"}} a = ArticlesCrudApi.create(data, account)
def test_05_toc_correctly_uses_eissn(self): j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) eissn = j.bibjson().first_eissn # remove pissn j.bibjson().remove_identifiers(idtype=j.bibjson().P_ISSN, id=j.bibjson().first_pissn) j.set_last_manual_update() j.save(blocking=True) a = models.Article(**ArticleFixtureFactory.make_article_source(pissn=eissn, in_doaj=True)) a.save(blocking=True) with self.app_test.test_client() as t_client: response = t_client.get('/toc/{}'.format(j.bibjson().get_preferred_issn())) assert response.status_code == 200 assert 'var toc_issns = ["{eissn}"];'.format(eissn=eissn) in response.data
def test_05_delete_articles_fail(self): # set up all the bits we need dataset = [] for i in range(10): data = ArticleFixtureFactory.make_incoming_api_article(doi="10.123/test/" + str(i), fulltext="http://example.com/" + str(i)) dataset.append(data) # create the main account we're going to work as article_owner = models.Account() article_owner.set_id("test") article_owner.set_name("Tester") article_owner.set_email("*****@*****.**") # create another account which will own the articles so the one # above will be "another user" trying to delete our precious articles. somebody_else = models.Account() somebody_else.set_id("somebody_else") somebody_else.set_name("Somebody Else") somebody_else.set_email("*****@*****.**") # add a journal to the article owner account to create that link # between account and articles journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(article_owner.id) journal.save() time.sleep(1) # call create on the objects (which will save it to the index) ids = ArticlesBulkApi.create(dataset, article_owner) # let the index catch up time.sleep(2) # call delete on the object in various context that will fail # without an account with self.assertRaises(Api401Error): ArticlesBulkApi.delete(ids, None) # with the wrong account article_owner.set_id("other") with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, somebody_else) # on the wrong id ids.append("adfasdfhwefwef") article_owner.set_id("test") with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, article_owner) with self.assertRaises(Api400Error): ArticlesBulkApi.delete(ids, article_owner)
def test_05_outgoing_article_do(self): # make a blank one oa = OutgoingArticleDO() # make one from an incoming article model fixture data = ArticleFixtureFactory.make_article_source() ap = models.Article(**data) # add some history to the article (it doesn't matter what it looks like since it shouldn't be there at the other end) ap.add_history(bibjson={'Lorem': {'ipsum': 'dolor', 'sit': 'amet'}, 'consectetur': 'adipiscing elit.'}) # Create the DataObject oa = OutgoingArticleDO.from_model(ap) # check that it does not contain information that it shouldn't assert oa.data.get("index") is None assert oa.data.get("history") is None
def test_01_article_index_date_parsing(self): """ The ToC date histogram needs an accurate datestamp in the article's index """ a = models.Article(**ArticleFixtureFactory.make_article_source()) # Check we can handle shortened years a.bibjson().year = '12' a.bibjson().month = '03' d = a.bibjson().get_publication_date() assert d == '2012-03-01T00:00:00Z' a.bibjson().year = '86' # beware: this test will give a false negative 70 years from a.bibjson().month = '11' # the time of writing; this gives adequate warning (24 years) d = a.bibjson().get_publication_date() # to fix hard-coding of centuries in get_publication_date(). assert d == '1986-11-01T00:00:00Z' # Check we can handle numeric months a.bibjson().month = '03' a.bibjson().year = '2001' d = a.bibjson().get_publication_date() assert d == '2001-03-01T00:00:00Z' # Check we can handle full months a.bibjson().month = 'March' a.bibjson().year = '2001' d = a.bibjson().get_publication_date() assert d == '2001-03-01T00:00:00Z' # String cases? a.bibjson().month = 'nOVeMBer' a.bibjson().year = '2006' d = a.bibjson().get_publication_date() assert d == '2006-11-01T00:00:00Z' # And check we can handle abbreviated months a.bibjson().month = 'Dec' a.bibjson().year = '1993' d = a.bibjson().get_publication_date() assert d == '1993-12-01T00:00:00Z' # Finally, it wouldn't do if a bogus month was interpreted as a real one. The stamp should be created as Jan. a.bibjson().month = 'Flibble' a.bibjson().year = '1999' d = a.bibjson().get_publication_date() assert d == '1999-01-01T00:00:00Z'
def setUp(self): super(TestTaskJournalBulkDelete, self).setUp() ArticleBulkDeleteBackgroundTask.BATCH_SIZE = 13 self.journals = [] self.articles = [] for j_src in JournalFixtureFactory.make_many_journal_sources(count=TEST_JOURNAL_COUNT): j = models.Journal(**j_src) self.journals.append(j) j.save() for i in range(0, TEST_ARTICLES_PER_JOURNAL): a = models.Article(**ArticleFixtureFactory.make_article_source(with_id=False, eissn=j.bibjson().first_eissn, pissn=j.bibjson().first_pissn)) a.save() self.articles.append(a) sleep(2) self._make_and_push_test_context(acc=models.Account(**AccountFixtureFactory.make_managing_editor_source()))
def test_30_article_journal_sync(self): j = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) a = models.Article(**ArticleFixtureFactory.make_article_source(in_doaj=False, with_journal_info=False)) assert a.has_seal() is False assert a.bibjson().journal_issns != j.bibjson().issns() reg = models.Journal() changed = a.add_journal_metadata(j, reg) assert changed is True assert a.has_seal() is True assert a.is_in_doaj() is True assert a.bibjson().journal_issns == j.bibjson().issns() assert a.bibjson().publisher == j.bibjson().publisher assert a.bibjson().journal_country == j.bibjson().country assert a.bibjson().journal_language == j.bibjson().language assert a.bibjson().journal_title == j.bibjson().title changed = a.add_journal_metadata(j) assert changed is False
def test_06_retrieve_article_success(self): # set up all the bits we need # add a journal to the account account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() ap = models.Article(**data) ap.save() time.sleep(1) # call retrieve on the object with a valid user a = ArticlesCrudApi.retrieve(ap.id, account) # call retrieve with no user (will return if in_doaj is True) a = ArticlesCrudApi.retrieve(ap.id, None) # check that we got back the object we expected assert isinstance(a, OutgoingArticleDO) assert a.id == ap.id assert a.bibjson.journal.start_page == '3', a.bibjson.journal.start_page assert a.bibjson.journal.end_page == '21' assert a.bibjson.journal.volume == '1' assert a.bibjson.journal.number == '99' assert a.bibjson.journal.publisher == 'The Publisher', a.bibjson().publisher assert a.bibjson.journal.title == 'The Title' assert a.bibjson.journal.license[0].title == "CC BY" assert a.bibjson.journal.license[0].type == "CC BY" assert a.bibjson.journal.license[0].url == "http://license.example.com" assert a.bibjson.journal.license[0].version == "1.0" assert a.bibjson.journal.license[0].open_access == True assert a.bibjson.journal.language == ["EN", "FR"] assert a.bibjson.journal.country == "US"
def test_04_delete_article_success(self): # set up all the bits we need dataset = [] for i in range(10): data = ArticleFixtureFactory.make_incoming_api_article(doi="10.123/test/" + str(i), fulltext="http://example.com/" + str(i)) dataset.append(data) # create the account we're going to work as account = models.Account() account.set_id("test") account.set_name("Tester") account.set_email("*****@*****.**") # add a journal to the account journal = models.Journal(**JournalFixtureFactory.make_journal_source(in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) # call create on the objects (which will save it to the index) ids = ArticlesBulkApi.create(dataset, account) # let the index catch up time.sleep(2) # now delete half of them dels = ids[:5] ArticlesBulkApi.delete(dels, account) # let the index catch up time.sleep(2) for id in dels: ap = models.Article.pull(id) assert ap is None for id in ids[5:]: ap = models.Article.pull(id) assert ap is not None
def test_public_data_dump(self, name, kwargs): clean_arg = kwargs.get("clean") prune_arg = kwargs.get("prune") types_arg = kwargs.get("types") journals_arg = kwargs.get("journals") articles_arg = kwargs.get("articles") batch_size_arg = kwargs.get("batch_size") tmp_write_arg = kwargs.get("tmp_write") store_write_arg = kwargs.get("store_write") status_arg = kwargs.get("status") ############################################### ## set up clean = True if clean_arg == "yes" else False if clean_arg == "no" else None prune = True if prune_arg == "yes" else False if prune_arg == "no" else None types = types_arg if types_arg != "-" else None journal_count = int(journals_arg) article_count = int(articles_arg) batch_size = int(batch_size_arg) journal_file_count = 0 if journal_count == 0 else (journal_count / batch_size) + 1 article_file_count = 0 if article_count == 0 else (article_count / batch_size) + 1 first_article_file_records = 0 if article_count == 0 else batch_size if article_count > batch_size else article_count first_journal_file_records = 0 if journal_count == 0 else batch_size if journal_count > batch_size else journal_count # add the data to the index first, to maximise the time it has to become available for search sources = JournalFixtureFactory.make_many_journal_sources(journal_count, in_doaj=True) jids = [] for i in range(len(sources)): source = sources[i] journal = models.Journal(**source) journal.save() jids.append((journal.id, journal.last_updated)) aids = [] for i in range(article_count): source = ArticleFixtureFactory.make_article_source( eissn="{x}000-0000".format(x=i), pissn="0000-{x}000".format(x=i), with_id=False, doi="10.123/{x}".format(x=i), fulltext="http://example.com/{x}".format(x=i) ) article = models.Article(**source) article.save() aids.append((article.id, article.last_updated)) # construct some test data in the local store container_id = app.config["STORE_PUBLIC_DATA_DUMP_CONTAINER"] localStore = store.StoreLocal(None) localStoreFiles = [] if clean or prune: for i in range(5): localStore.store(container_id, "doaj_article_data_2018-01-0" + str(i) + ".tar.gz", source_stream=StringIO("test")) localStore.store(container_id, "doaj_journal_data_2018-01-0" + str(i) + ".tar.gz", source_stream=StringIO("test")) localStoreFiles = localStore.list(container_id) app.config["DISCOVERY_RECORDS_PER_FILE"] = batch_size # set the mocks for store write failures if tmp_write_arg == "fail": app.config["STORE_TMP_IMPL"] = StoreMockFactory.no_writes_classpath() if store_write_arg == "fail": app.config["STORE_IMPL"] = StoreMockFactory.no_writes_classpath() # block until all the records are saved for jid, lu in jids: models.Journal.block(jid, lu, sleep=0.05) for aid, lu in aids: models.Article.block(aid, lu, sleep=0.05) ########################################################### # Execution job = PublicDataDumpBackgroundTask.prepare("testuser", clean=clean, prune=prune, types=types) task = PublicDataDumpBackgroundTask(job) BackgroundApi.execute(task) # make sure we have a fresh copy of the job job = task.background_job assert job.status == status_arg if job.status != "error": article_url = models.cache.Cache.get_public_data_dump().get("article", {}).get("url") if types_arg in ["-", "all", "article"]: assert article_url is not None else: assert article_url is None journal_url = models.cache.Cache.get_public_data_dump().get("journal", {}).get("url") if types_arg in ["-", "all", "journal"]: assert journal_url is not None else: assert journal_url is None assert localStore.exists(container_id) files = localStore.list(container_id) if types_arg in ["-", "all"]: assert len(files) == 2 else: assert len(files) == 1 day_at_start = dates.today() if types_arg in ["-", "all", "article"]: article_file = "doaj_article_data_" + day_at_start + ".tar.gz" assert article_file in files stream = localStore.get(container_id, article_file) tarball = tarfile.open(fileobj=stream, mode="r:gz") members = tarball.getmembers() assert len(members) == article_file_count if len(members) > 0: f = tarball.extractfile(members[0]) data = json.loads(f.read()) assert len(data) == first_article_file_records record = data[0] for key in record.keys(): assert key in ["admin", "bibjson", "id", "last_updated", "created_date"] if "admin" in record: for key in record["admin"].keys(): assert key in ["ticked", "seal"] if types_arg in ["-", "all", "journal"]: journal_file = "doaj_journal_data_" + day_at_start + ".tar.gz" assert journal_file in files stream = localStore.get(container_id, journal_file) tarball = tarfile.open(fileobj=stream, mode="r:gz") members = tarball.getmembers() assert len(members) == journal_file_count if len(members) > 0: f = tarball.extractfile(members[0]) data = json.loads(f.read()) assert len(data) == first_journal_file_records record = data[0] for key in record.keys(): assert key in ["admin", "bibjson", "id", "last_updated", "created_date"] if "admin" in record: for key in record["admin"].keys(): assert key in ["ticked", "seal"] else: # in the case of an error, we expect the tmp store to have been cleaned up tmpStore = store.TempStore() assert not tmpStore.exists(container_id) # in the case of an error, we expect the main store not to have been touched # (for the errors that we are checking for) if prune and not clean: # no matter what the error, if we didn't specify clean then we expect everything # to survive survived = localStore.list(container_id) assert localStoreFiles == survived elif clean: # if we specified clean, then it's possible the main store was cleaned before the # error occurred, in which case it depends on the error. This reminds us that # clean shouldn't be used in production if tmp_write_arg == "fail": assert not localStore.exists(container_id) else: survived = localStore.list(container_id) assert localStoreFiles == survived else: # otherwise, we expect the main store to have survived assert not localStore.exists(container_id)
def test_01_batch_create_article(self, name, kwargs): articles_arg = kwargs.get("articles") duplicate_in_batch_arg = kwargs.get("duplicate_in_batch") duplicate_in_index_arg = kwargs.get("duplicate_in_index") account_arg = kwargs.get("account") duplicate_check_arg = kwargs.get("duplicate_check") merge_duplicate_arg = kwargs.get("merge_duplicate") limit_to_account_arg = kwargs.get("limit_to_account") add_journal_info_arg = kwargs.get("add_journal_info") raises_arg = kwargs.get("raises") success_arg = kwargs.get("success") fail_arg = kwargs.get("fail") update_arg = kwargs.get("update") ############################################### ## set up success = int(success_arg) fail = int(fail_arg) update = int(update_arg) duplicate_in_batch = duplicate_in_batch_arg == "yes" duplicate_in_index = int(duplicate_in_index_arg) raises = EXCEPTIONS.get(raises_arg) duplicate_check = None if duplicate_check_arg != "none": duplicate_check = True if duplicate_check_arg == "true" else False merge_duplicate = None if merge_duplicate_arg != "none": merge_duplicate = True if merge_duplicate_arg == "true" else False limit_to_account = None if limit_to_account_arg != "none": limit_to_account = True if limit_to_account_arg == "true" else False add_journal_info = None if add_journal_info_arg != "none": add_journal_info = True if add_journal_info_arg == "true" else False account = None if account_arg != "none": source = AccountFixtureFactory.make_publisher_source() account = Account(**source) journal_specs = [] last_doi = None last_ft = None last_issn = None last_id = None articles = None if articles_arg != "none": articles = [] if articles_arg == "yes": # one with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="0000-0000", pissn="0000-0000", doi="10.123/abc/0", fulltext=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "0", "pissn" : "0000-0000", "eissn" : "0000-0000"}) # another with a DOI and no fulltext source = ArticleFixtureFactory.make_article_source( eissn="1111-1111", pissn="1111-1111", doi="10.123/abc/1", fulltext=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "1", "pissn" : "1111-1111", "eissn" : "1111-1111"}) # one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="2222-2222", pissn="2222-2222", fulltext="http://example.com/2", doi=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "2", "pissn" : "2222-2222", "eissn" : "2222-2222"}) # another one with a fulltext and no DOI source = ArticleFixtureFactory.make_article_source( eissn="3333-3333", pissn="3333-3333", fulltext="http://example.com/3", doi=False ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "3", "pissn" : "3333-3333", "eissn" : "3333-3333"}) last_issn = "3333-3333" last_doi = "10.123/abc/1" last_ft = "http://example.com/3" last_id = articles[-1].id if duplicate_in_batch: # one with a duplicated DOI source = ArticleFixtureFactory.make_article_source( eissn="4444-4444", pissn="4444-4444", doi="10.123/abc/0", fulltext="http://example.com/4" ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "4", "pissn" : "4444-4444", "eissn" : "4444-4444"}) # one with a duplicated Fulltext source = ArticleFixtureFactory.make_article_source( eissn="5555-5555", pissn="5555-5555", doi="10.123/abc/5", fulltext="http://example.com/1" ) del source["bibjson"]["journal"] article = Article(**source) article.set_id() articles.append(article) if add_journal_info: journal_specs.append({"title" : "5", "pissn" : "5555-5555", "eissn" : "5555-5555"}) ilo_mock = None if account_arg == "owner": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit=True) elif account_arg == "own_1": ilo_mock = BLLArticleMockFactory.is_legitimate_owner(legit_on_issn=[last_issn]) else: ilo_mock = BLLArticleMockFactory.is_legitimate_owner() self.svc.is_legitimate_owner = ilo_mock gd_mock = None if duplicate_in_index == 1: gd_mock = BLLArticleMockFactory.get_duplicate(given_article_id=last_id, eissn=last_issn, pissn=last_issn, doi=last_doi, fulltext=last_ft) elif duplicate_in_index == 2: gd_mock = BLLArticleMockFactory.get_duplicate(merge_conflict=True) else: gd_mock = BLLArticleMockFactory.get_duplicate(return_none=True) self.svc.get_duplicate = gd_mock ios_mock = BLLArticleMockFactory.issn_ownership_status([], [], [], []) self.svc.issn_ownership_status = ios_mock if add_journal_info: gj_mock = ModelArticleMockFactory.get_journal(journal_specs) Article.get_journal = gj_mock ########################################################### # Execution if raises is not None: with self.assertRaises(raises): try: self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) except exceptions.IngestException as e: if duplicate_in_index != 2: report = e.result assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update raise else: report = self.svc.batch_create_articles(articles, account, duplicate_check, merge_duplicate, limit_to_account, add_journal_info) # make sure all the articles are saved before running the asserts aids = [(a.id, a.last_updated) for a in articles] for aid, lu in aids: Article.block(aid, lu, sleep=0.05) assert report["success"] == success assert report["fail"] == fail assert report["update"] == update assert report["new"] == success - update if success > 0: all_articles = Article.all() if len(all_articles) != success: time.sleep(0.5) all_articles = Article.all() assert len(all_articles) == success for article in all_articles: if add_journal_info: assert article.bibjson().journal_title is not None else: assert article.bibjson().journal_title is None else: # there's nothing in the article index with self.assertRaises(ESMappingMissingError): Article.all()