def test_06(self): id1 = uuid.uuid4().hex id2 = uuid.uuid4().hex a = models.Article() a.set_id(id1) b = a.bibjson() b.title = "Example A article with a fulltext url" b.abstract = "a bunch of text" b.add_url( "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext") a2 = models.Article() a2.set_id(id2) b2 = a2.bibjson() b2.title = "Example B article with a fulltext url" b2.add_url( "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext") assert a2.id == id2 a2.merge(a) assert a2.id == id1, (a2.id, id1, id2) assert a2.bibjson().title == "Example B article with a fulltext url" assert a2.bibjson().abstract is None
def test_02(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url( "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext") a.save() # pause to allow the index time to catch up time.sleep(2) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", urltype="fulltext") # get the xwalk to determine if there is a duplicate xwalk = article.XWalk() d = xwalk.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example 2 article with a fulltext url"
def test_07_both_duplication_criteria(self): """Check that an article is only reported once if it is duplicated by both DOI and fulltext URL""" # make ourselves an example article ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" doi = "10.doi/123" a = models.Article() b = a.bibjson() b.title = "Example article with a fulltext url and a DOI" b.add_url(ftu, urltype="fulltext") b.add_identifier('doi', doi) a.save(blocking=True) # create another article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url and a DOI" y.add_url(ftu, urltype="fulltext") y.add_identifier('doi', doi) # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicates(z) assert len(d) == 1 print len(d) assert d[0].bibjson( ).title == "Example article with a fulltext url and a DOI"
def to_article_model(self, existing=None): dat = deepcopy(self.data) if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get( "journal", {}): dat["bibjson"]["start_page"] = dat["bibjson"]["journal"][ "start_page"] del dat["bibjson"]["journal"]["start_page"] if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get( "journal", {}): dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"] del dat["bibjson"]["journal"]["end_page"] # clear out fields that we don't accept via the API if "admin" in dat and "in_doaj" in dat["admin"]: del dat["admin"]["in_doaj"] if "admin" in dat and "seal" in dat["admin"]: del dat["admin"]["seal"] if "admin" in dat and "upload_id" in dat["admin"]: del dat["admin"]["upload_id"] if existing is None: return models.Article(**dat) else: merged = dataobj.merge_outside_construct(self._struct, dat, existing.data) return models.Article(**merged)
def test_01_same_fulltext(self): """Check duplication detection on articles with the same fulltext URL""" # A list of various URLs to check matching on ftus = [ "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf", "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf", "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf" ] for ftu in ftus: # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example article with a fulltext url" b.add_url(ftu, urltype="fulltext") a.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url(ftu, urltype="fulltext") # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example article with a fulltext url"
def test_03(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url( "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf", urltype="fulltext") a.save() # pause to allow the index time to catch up time.sleep(2) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://www.ujcem.med.sumdu.edu.ua/images/sampledata/2013/4/408_412_IV-020.pdf", urltype="fulltext") # get the xwalk to determine if there is a duplicate xwalk = article.XWalk() d = xwalk.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example 2 article with a fulltext url"
def test_04(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url( "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf", urltype="fulltext") a.save() # pause to allow the index time to catch up time.sleep(2) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://www.psychologie-aktuell.com/fileadmin/download/ptam/1-2014_20140324/01_Geiser.pdf", urltype="fulltext") # get the xwalk to determine if there is a duplicate xwalk = article.XWalk() d = xwalk.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example 2 article with a fulltext url"
def test_01_duplicates_report(self): """Check duplication reporting across all articles in the index""" # Create 2 identical articles, a duplicate pair article1 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True)) a1_doi = article1.bibjson().get_identifiers('doi') assert a1_doi is not None article1.save(blocking=True) time.sleep(1) article2 = models.Article(**ArticleFixtureFactory.make_article_source( eissn='1111-1111', pissn='2222-2222', with_id=False, in_doaj=True, with_journal_info=True)) a2_doi = article2.bibjson().get_identifiers('doi') assert a2_doi == a1_doi article2.save(blocking=True) # Run the reporting task user = app.config.get("SYSTEM_USERNAME") job = article_duplicate_report.ArticleDuplicateReportBackgroundTask.prepare( user, outdir=TMP_DIR) task = article_duplicate_report.ArticleDuplicateReportBackgroundTask( job) task.run() # The audit log should show we saved the reports to the TMP_DIR defined above audit_1 = job.audit.pop(0) assert audit_1.get('message', '').endswith(TMP_DIR) assert os.path.exists(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') # It should also clean up its interim article csv assert not os.path.exists( paths.rel2abs(__file__, 'tmp_article_duplicate_report')) # The duplicates should be detected and appear in the report and audit summary count with open(TMP_DIR + '/duplicate_articles_global_' + dates.today() + '.csv') as f: csvlines = f.readlines() # We expect one result line + headings: our newest article has 1 duplicate res = csvlines.pop() assert res.startswith( article2.id ) # The newest comes first, so article1 is article2's duplicate. assert article1.id in res assert 'doi+fulltext' in res audit_2 = job.audit.pop(0) assert audit_2.get( 'message', '' ) == '2 articles processed for duplicates. 1 global duplicate sets found.'
def test_05_full_doi(self): """ Test that we still detect duplicate DOIs when we have the full URI, not just the 10. """ # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a DOI" b.add_identifier('doi', "https://doi.org/10.doi/123") a.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a DOI" not_duplicate_bibjson.add_identifier( 'doi', "https://doi.org/10.doi/DIFFERENT") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for DOI" y.add_identifier('doi', "http://doi.org/10.doi/123") # determine if there's a duplicate articleService = DOAJ.articleService() dups = articleService.get_duplicates(z) assert len(dups) == 1 # Check when we ask for one duplicate we get the most recent duplicate. d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example A article with a DOI", d.bibjson( ).title
def test_04_with_doi_instead(self): """Detect a duplicate using the DOI field.""" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a DOI" b.add_identifier('doi', "10.doi/123") a.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a DOI" not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for DOI" y.add_identifier('doi', "10.doi/123") # determine if there's a duplicate articleService = DOAJ.articleService() dups = articleService.get_duplicates(z) assert len(dups) == 1 # Check when we ask for one duplicate we get the most recent duplicate. d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example A article with a DOI", d.bibjson( ).title
def test_09_search(self): # Just bringing it all together. Make 4 articles: 3 in DOAJ, 1 not in DOAJ # We then expect pre-filters to run on the query, ensuring we only get the 3 in DOAJ articles. # We also expect the post-filters to run on the results, ensuring non-public data is deleted from the admin section. qsvc = QueryService() articles = [] for i in range(0, 3): articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False))) assert articles[-1].publisher_record_id() == 'some_identifier' articles[-1].save(blocking=True) articles.append( models.Article(**ArticleFixtureFactory.make_article_source( with_id=False, in_doaj=False))) articles[-1].save(blocking=True) res = qsvc.search('query', 'article', {"query": { "match_all": {} }}, account=None, additional_parameters={}) assert res['hits']['total'] == 3, res['hits']['total'] for hit in res['hits']['hits']: am = models.Article(**hit) assert am.publisher_record_id() is None, am.publisher_record_id()
def test_02_merge(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") a.save() # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") # do a merge z.merge(a) # let the index catch up, then we can check this worked time.sleep(2) hist = models.ArticleHistory.get_history_for(a.id) print hist print len(hist) assert len(hist) == 1 assert hist[0].data.get( "bibjson", {}).get("title") == "Example 2 article with a fulltext url"
def test_01(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example article with a fulltext url" b.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") a.save() # pause to allow the index time to catch up time.sleep(2) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") # get the xwalk to determine if there is a duplicate xwalk = article.XWalk() d = xwalk.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example article with a fulltext url"
def test_02_merge(self): # make ourselves an example article a = models.Article() b = a.bibjson() b.title = "Example 2 article with a fulltext url" b.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") a.save() # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url( "http://examplejournal.telfor.rs/Published/Vol1No1/Vol1No1_A5.pdf", urltype="fulltext") # do a merge z.merge(a) history_files = self.list_today_article_history_files() assert len(history_files) == 1 with open(history_files[0], 'rb') as i: hist = json.loads(i.read()) assert hist assert hist.get( "bibjson", {}).get("title") == "Example 2 article with a fulltext url"
def test_04_with_doi_instead(self): """Detect a duplicate using the DOI field.""" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a DOI" b.add_identifier('doi', "10.doi/123") a.save(blocking=True) # Wait a second to ensure the timestamps are different time.sleep(1.01) a2 = models.Article() b2 = a2.bibjson() b2.title = "Example B article with a DOI" b2.add_identifier('doi', "10.doi/123") a2.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a DOI" not_duplicate_bibjson.add_identifier('doi', "10.doi/DIFFERENT") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for DOI" y.add_identifier('doi', "10.doi/123") # determine if there's a duplicate articleService = DOAJ.articleService() dups = articleService.get_duplicates(z) assert len(dups) == 2 # Check when we ask for one duplicate we get the most recent duplicate. d = articleService.get_duplicate(z) assert d is not None assert d.bibjson().title == "Example B article with a DOI", d.bibjson( ).title # get the xwalk to determine all duplicates # sort both results and expectations here to avoid false alarm # we don't care about the order of duplicates expected = sorted([a, a2]) # determine if there's a duplicate l = articleService.get_duplicates(z) assert isinstance(l, list) assert l assert len(l) == 2 l.sort() assert expected == l
def test_03_retrieve_multiple_conflict(self): ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a fulltext url" b.add_url(ftu, urltype="fulltext") a.save(blocking=True) # Wait a second to ensure the timestamps are different time.sleep(1.01) a2 = models.Article() b2 = a2.bibjson() b2.title = "Example B article with a fulltext url" b2.add_url(ftu, urltype="fulltext") a2.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a fulltext url" not_duplicate_bibjson.add_url("http://this.is/a/different/url", urltype="fulltext") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url(ftu, urltype="fulltext") # determine that there are multiple duplicates articleService = DOAJ.articleService() with self.assertRaises(ArticleMergeConflict): d = articleService.get_duplicate(z) # get the xwalk to determine all duplicates # sort both results and expectations here to avoid false alarm # we don't care about the order of duplicates expected = [a, a2] expected.sort(key=lambda x: datetime.strptime(x.last_updated, "%Y-%m-%dT%H:%M:%SZ"), reverse=True) # determine if there's a duplicate l = articleService.get_duplicates(z) assert isinstance(l, list), l assert l is not None l.sort(key=lambda x: datetime.strptime(x.last_updated, "%Y-%m-%dT%H:%M:%SZ"), reverse=True) assert expected == l
def test_03_retrieve_latest(self): ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" # make ourselves a couple of example articles a = models.Article() b = a.bibjson() b.title = "Example A article with a fulltext url" b.add_url(ftu, urltype="fulltext") a.save(blocking=True) # Wait a second to ensure the timestamps are different time.sleep(1.01) a2 = models.Article() b2 = a2.bibjson() b2.title = "Example B article with a fulltext url" b2.add_url(ftu, urltype="fulltext") a2.save(blocking=True) # create an article which should not be caught by the duplicate detection not_duplicate = models.Article() not_duplicate_bibjson = not_duplicate.bibjson() not_duplicate_bibjson.title = "Example C article with a fulltext url" not_duplicate_bibjson.add_url("http://this.is/a/different/url", urltype="fulltext") not_duplicate.save(blocking=True) # create a replacement article z = models.Article() y = z.bibjson() y.title = "Replacement article for fulltext url" y.add_url(ftu, urltype="fulltext") # determine if there's a duplicate articleService = DOAJ.articleService() d = articleService.get_duplicate(z) # Check when we ask for one duplicate we get the most recent duplicate. assert d is not None assert d.bibjson( ).title == "Example B article with a fulltext url", d.bibjson().title # get the xwalk to determine all duplicates # sort both results and expectations here to avoid false alarm # we don't care about the order of duplicates expected = sorted([a, a2]) # determine if there's a duplicate l = articleService.get_duplicates(z) assert isinstance(l, list), l assert l is not None l.sort() assert expected == l
def to_article_model(self, existing=None): dat = deepcopy(self.data) if "journal" in dat["bibjson"] and "start_page" in dat["bibjson"].get("journal", {}): dat["bibjson"]["start_page"] = dat["bibjson"]["journal"]["start_page"] del dat["bibjson"]["journal"]["start_page"] if "journal" in dat["bibjson"] and "end_page" in dat["bibjson"].get("journal", {}): dat["bibjson"]["end_page"] = dat["bibjson"]["journal"]["end_page"] del dat["bibjson"]["journal"]["end_page"] if existing is None: return models.Article(**dat) else: merged = dataobj.merge_outside_construct(self._struct, dat, existing.data) return models.Article(**merged)
def test_09_article(self): """test if the OAI-PMH journal feed returns records and only displays journals accepted in DOAJ""" article_source = ArticleFixtureFactory.make_article_source(eissn='1234-1234', pissn='5678-5678,', in_doaj=False) """test if the OAI-PMH article feed returns records and only displays articles accepted in DOAJ""" a_private = models.Article(**article_source) ba = a_private.bibjson() ba.title = "Private Article" a_private.save(blocking=True) article_source = ArticleFixtureFactory.make_article_source(eissn='4321-4321', pissn='8765-8765,', in_doaj=True) a_public = models.Article(**article_source) ba = a_public.bibjson() ba.title = "Public Article" a_public.save(blocking=True) public_id = a_public.id time.sleep(1) with self.app_test.test_request_context(): with self.app_test.test_client() as t_client: resp = t_client.get(url_for('oaipmh.oaipmh', specified='article', verb='ListRecords', metadataPrefix='oai_dc')) assert resp.status_code == 200 t = etree.fromstring(resp.data) records = t.xpath('/oai:OAI-PMH/oai:ListRecords', namespaces=self.oai_ns) # Check we only have one journal returned r = records[0].xpath('//oai:record', namespaces=self.oai_ns) assert len(r) == 1 # Check we have the correct journal title = r[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text # check orcid_id xwalk assert str(records[0].xpath('//dc:creator/@id', namespaces=self.oai_ns)[0]) == a_public.bibjson().author[0].get("orcid_id") assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title resp = t_client.get(url_for('oaipmh.oaipmh', specified='article', verb='GetRecord', metadataPrefix='oai_dc') + '&identifier=abcdefghijk_article') assert resp.status_code == 200 t = etree.fromstring(resp.data) records = t.xpath('/oai:OAI-PMH/oai:GetRecord', namespaces=self.oai_ns) # Check we only have one journal returnedt kids = records[0].getchildren() r = records[0].xpath('//oai:record', namespaces=self.oai_ns) assert len(r) == 1 # Check we have the correct journal assert records[0].xpath('//dc:title', namespaces=self.oai_ns)[0].text == a_public.bibjson().title
def minimise_article(full_article): # we want to keep the id and the bibjson id = full_article.id bibjson = deepcopy(full_article.bibjson()) # remove the issns from the bibjson bibjson.remove_identifiers(idtype=bibjson.P_ISSN) bibjson.remove_identifiers(idtype=bibjson.E_ISSN) # remove all the journal metadata bibjson.remove_journal_metadata() # remove all the subject classifications bibjson.remove_subjects() # remove the year and the month (they are held elsewhere in this case) del bibjson.month del bibjson.year # create a minimised version of the article minimised = models.Article() minimised.set_id(id) minimised.set_bibjson(bibjson) return minimised
def setUp(self): super(TestTaskJournalBulkDelete, self).setUp() self.journals = [] self.articles = [] for j_src in JournalFixtureFactory.make_many_journal_sources( count=TEST_JOURNAL_COUNT): j = models.Journal(**j_src) self.journals.append(j) j.save() for i in range(0, TEST_ARTICLES_PER_JOURNAL): a = models.Article(**ArticleFixtureFactory.make_article_source( with_id=False, eissn=j.bibjson().first_eissn, pissn=j.bibjson().first_pissn)) a.save() self.articles.append(a) sleep(2) self.forbidden_accounts = [ AccountFixtureFactory.make_editor_source()['id'], AccountFixtureFactory.make_assed1_source()['id'], AccountFixtureFactory.make_assed2_source()['id'], AccountFixtureFactory.make_assed3_source()['id'] ] self._make_and_push_test_context(acc=models.Account( **AccountFixtureFactory.make_managing_editor_source()))
def test_07_retrieve_article_fail(self): # set up all the bits we need # add a journal to the account account = models.Account() account.set_id('test') account.set_name("Tester") account.set_email("*****@*****.**") journal = models.Journal(**JournalFixtureFactory.make_journal_source( in_doaj=True)) journal.set_owner(account.id) journal.save() time.sleep(1) data = ArticleFixtureFactory.make_article_source() data['admin']['in_doaj'] = False ap = models.Article(**data) ap.save() time.sleep(1) # should fail when no user and in_doaj is False with self.assertRaises(Api401Error): a = ArticlesCrudApi.retrieve(ap.id, None) # wrong user account = models.Account() account.set_id("asdklfjaioefwe") with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve(ap.id, account) # non-existant article account = models.Account() account.set_id(ap.id) with self.assertRaises(Api404Error): a = ArticlesCrudApi.retrieve("ijsidfawefwefw", account)
def test_06_crossref_article_xml_xwalk(self): handle = CrossrefArticleFixtureFactory.upload_2_issns_correct() xwalk = CrossrefXWalk() art = xwalk.crosswalk_file(file_handle=handle, add_journal_info=False) article = models.Article(**art[0]) bibjson = article.bibjson() assert bibjson.journal_title == "2 ISSNs Correct", "expected '2 ISSNs Correct', received: {}".format( bibjson.journal_title) assert bibjson.get_one_identifier( bibjson.P_ISSN ) == "1234-5678", "expected '1234-5678', received: {}".format( bibjson.get_one_identifier(bibjson.P_ISSN)) assert bibjson.get_one_identifier( bibjson.E_ISSN ) == "9876-5432", "expected '9876-5432', received: {}".format( bibjson.get_one_identifier(bibjson.E_ISSN)) assert bibjson.year == "2004", "expected '2004', received: {}".format( bibjson.year) assert bibjson.title == "Article 12292005 9:32", "expected 'Article 12292005 9:32', received: {}".format( bibjson.title) assert bibjson.author == [{ 'name': 'Surname, Bob' }], "expected [{{'name': 'Surname, Bob'}}]', received: {}".format( bibjson.author) assert bibjson.get_single_url( "fulltext" ) == "http://www.crossref.org/", "expected 'http://www.crossref.org/', received: {}".format( bibjson.get_single_url("fulltext"))
def test_05_doaj_article_xml_xwalk(self): handle = DoajXmlArticleFixtureFactory.upload_2_issns_correct() xwalk = DOAJXWalk() art = xwalk.crosswalk_file(file_handle=handle, add_journal_info=False) article = models.Article(**art[0]) bibjson = article.bibjson() assert bibjson.journal_language == [ "fre" ], "expected ['fre'], actual: {} ".format(bibjson.journal_language) assert bibjson.publisher == "Codicille éditeur et CRILCQ", "expected 'Codicille éditeur et CRILCQ', actual: {} ".format( bibjson.publisher) assert bibjson.journal_title == "2 ISSNs Correct", "expected '2 ISSNs Correct', received: {}".format( bibjson.journal_title) assert bibjson.get_one_identifier( bibjson.P_ISSN ) == "1234-5678", "expected '1234-5678', received: {}".format( bibjson.get_one_identifier(bibjson.P_ISSN)) assert bibjson.get_one_identifier( bibjson.E_ISSN ) == "9876-5432", "expected '9876-5432', received: {}".format( bibjson.get_one_identifier(bibjson.E_ISSN)) assert bibjson.year == "2013", "expected '2013', received: {}".format( bibjson.year) assert bibjson.title == "Imaginaires autochtones contemporains. Introduction", "expected 'Imaginaires autochtones contemporains. Introduction', received: {}".format( bibjson.title) assert bibjson.author == [{ 'name': 'Papillon, Joëlle' }], "expected [{{'name': 'Papillon, Joëlle'}}]', received: {}".format( bibjson.author) assert bibjson.get_single_url( "fulltext" ) == "http://doaj.org/testing/url.pdf", "expected 'http://doaj.org/testing/url.pdf', received: {}".format( bibjson.get_single_url("fulltext"))
def test_01_withdraw_task(self): sources = JournalFixtureFactory.make_many_journal_sources(10, in_doaj=True) ids = [] articles = [] for source in sources: j = models.Journal(**source) j.save() ids.append(j.id) pissn = j.bibjson().get_identifiers(j.bibjson().P_ISSN) eissn = j.bibjson().get_identifiers(j.bibjson().E_ISSN) asource = ArticleFixtureFactory.make_article_source(pissn=pissn[0], eissn=eissn[0], with_id=False) a = models.Article(**asource) a.save() articles.append(a.id) time.sleep(2) job = SetInDOAJBackgroundTask.prepare("testuser", journal_ids=ids, in_doaj=False) SetInDOAJBackgroundTask.submit(job) time.sleep(2) for id in ids: j = models.Journal.pull(id) assert j.is_in_doaj() is False for id in articles: a = models.Article.pull(id) assert a.is_in_doaj() is False
def test_14_article_model_index(self): """Check article indexes generate""" a = models.Article(**ArticleFixtureFactory.make_article_source()) assert a.data.get('index', None) is None # Generate the index a.prep() assert a.data.get('index', None) is not None
def test_33_article_stats(self): articles = [] # make a bunch of articles variably in doaj/not in doaj, for/not for the issn we'll search for i in range(1, 3): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=True)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(3, 5): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="1111-1111", pissn="1111-1111", with_id=False, in_doaj=False)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(5, 7): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=True)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(7, 9): article = models.Article( **ArticleFixtureFactory.make_article_source(eissn="2222-2222", pissn="2222-2222", with_id=False, in_doaj=False)) article.set_created("2019-01-0" + str(i) + "T00:00:00Z") articles.append(article) for i in range(len(articles)): articles[i].save(blocking=i == len(articles) - 1) journal = models.Journal() bj = journal.bibjson() bj.add_identifier(bj.P_ISSN, "1111-1111") stats = journal.article_stats() assert stats.get("total") == 2 assert stats.get("latest") == "2019-01-02T00:00:00Z"
def test_10_journal_deletes(self): # tests the various methods that are key to journal deletes # populate the index with some journals for i in range(5): j = models.Journal() j.set_in_doaj(True) bj = j.bibjson() bj.title = "Test Journal {x}".format(x=i) bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i)) bj.publisher = "Test Publisher {x}".format(x=i) bj.add_url("http://homepage.com/{x}".format(x=i), "homepage") j.save() # make sure the last updated dates are suitably different time.sleep(0.66) # populate the index with some articles for i in range(5): a = models.Article() a.set_in_doaj(True) bj = a.bibjson() bj.title = "Test Article {x}".format(x=i) bj.add_identifier(bj.P_ISSN, "{x}000-0000".format(x=i)) bj.publisher = "Test Publisher {x}".format(x=i) a.save() # make sure the last updated dates are suitably different time.sleep(0.66) # now hit the key methods involved in journal deletes query = { "query": { "bool": { "must": [{ "term": { "bibjson.title.exact": "Test Journal 1" } }] } } } count = models.Journal.hit_count(query) assert count == 1 issns = models.Journal.issns_by_query(query) assert len(issns) == 1 assert "1000-0000" in issns models.Journal.delete_selected(query, articles=True) time.sleep(1) assert len(models.Article.all()) == 4 assert len(self.list_today_article_history_files()) == 1 assert len(models.Journal.all()) == 4 assert len(self.list_today_journal_history_files() ) == 6 # Because all journals are snapshot at create time
def test_02_toc_requirements(self): """ Check what we need for ToCs are in the article models """ a = models.Article(**ArticleFixtureFactory.make_article_source()) a.prep() # To build ToCs we need a volume, an issue, a year and a month. assert a.data['bibjson']['journal']['volume'] == '1' assert a.data['bibjson']['journal']['number'] == '99' assert a.data['index']['date'] == "1991-01-01T00:00:00Z" assert a.data['index']['date_toc_fv_month'] == a.data['index']['date'] == "1991-01-01T00:00:00Z"
def test_06_merge_replaces_metadata(self): """Ensure that merging replaces metadata of a new article, but keeps its old id.""" ftu = "http://www.sbe.deu.edu.tr/dergi/cilt15.say%C4%B12/06%20AKALIN.pdf" id1 = uuid.uuid4().hex id2 = uuid.uuid4().hex assert id1 != id2 a = models.Article() a.set_id(id1) b = a.bibjson() b.title = "Example A article with a fulltext url" b.abstract = "a bunch of text" b.add_url(ftu, urltype="fulltext") a2 = models.Article() a2.set_id(id2) b2 = a2.bibjson() b2.title = "Example B article with a fulltext url" b2.add_url(ftu, urltype="fulltext") # perform a merge, which updates article records of a2 based on a - including the id. assert a2.id == id2 a2.merge(a) assert a2.id == id1 # Check that we have the newer metadata assert a2.bibjson().title == "Example B article with a fulltext url" assert a2.bibjson().abstract is None # Create a 3rd article without an explicit id a3 = models.Article() b3 = a3.bibjson() b3.title = "Example C article with a fulltext url" b3.abstract = "a newer bunch of text" b3.add_url(ftu, urltype="fulltext") a3.merge(a2) assert a3.id == a2.id == a.id assert a3.bibjson().title == "Example C article with a fulltext url" assert a3.bibjson().abstract == "a newer bunch of text"