def test_success_create_global_url(self): self.assertEqual(global_url.objects.count(), 0) x = DblpIngester("Hello") url = global_url.objects.get(id=1) self.assertEqual(url.domain, 'http://dblp.uni-trier.de') self.assertEqual(x.get_global_url().id, 1) self.assertEqual(x.harvester_db, "harvester")
def test_success(self): setup_tables(os.path.join(test_path, "dblp_test1.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") self.assertEqual(ingester.get_global_url().id, 3) result = ingest_data(ingester) self.assertEqual(result, 2) # check local url self.assertEqual(local_url.objects.get(id=1).test(), [3, 'journals/acta/AkyildizB89', 1, 1,None]) self.assertEqual(local_url.objects.get(id=2).test(), [1, 'TODO PLATZHALTER', 1, 1,None]) self.assertEqual(local_url.objects.get(id=3).test(), [3, 'journals/acta/VoglerS014', 1, 1,None]) self.assertEqual(local_url.objects.get(id=4).test(), [1, 'TODO PLATZHALTER', 1, 1,None]) # check authors_model self.assertEqual(authors_model.objects.get(id=1).test(),["Ian F. Akyildiz", "ian f akyildiz"]) self.assertEqual(authors_model.objects.get(id=2).test(), ["Horst von Brand", "horst von brand"]) self.assertEqual(authors_model.objects.get(id=3).test(), ["Walter Vogler", "walter vogler"]) self.assertEqual(authors_model.objects.get(id=4).test(), ["Christian Stahl", "christian stahl"]) self.assertEqual(authors_model.objects.get(id=5).test(), ["Richard Müller", "richard muller"]) # check author alias self.assertEqual(author_aliases.objects.get(id=1).test(), [1, "Ian F. Akyildiz"]) self.assertEqual(author_aliases.objects.get(id=2).test(), [2, "Horst von Brand"]) self.assertEqual(author_aliases.objects.get(id=3).test(), [3, "Walter Vogler"]) self.assertEqual(author_aliases.objects.get(id=4).test(), [4, "Christian Stahl"]) self.assertEqual(author_aliases.objects.get(id=5).test(), [5, "Richard Müller 0001"]) self.assertEqual(author_aliases.objects.get(id=6).test(), [5, "Richard Müller"]) # cluster self.assertEqual(cluster.objects.get(id=1).name, "bla bla bla") self.assertEqual(cluster.objects.get(id=2).name, "kam kim kum") # author alias source self.assertEqual(author_alias_source.objects.get(id=1).test(), [1, 1]) self.assertEqual(author_alias_source.objects.get(id=2).test(), [2, 1]) self.assertEqual(author_alias_source.objects.get(id=3).test(), [3, 3]) self.assertEqual(author_alias_source.objects.get(id=4).test(), [4, 3]) self.assertEqual(author_alias_source.objects.get(id=5).test(), [5, 3]) self.assertEqual(author_alias_source.objects.get(id=6).test(), [6, 3]) # publication authors self.assertEqual(publication_author.objects.get(id=1).test(), [1, 1, 0]) self.assertEqual(publication_author.objects.get(id=2).test(), [1, 2, 1]) self.assertEqual(publication_author.objects.get(id=3).test(), [2, 1, 0]) self.assertEqual(publication_author.objects.get(id=4).test(), [2, 2, 1]) self.assertEqual(publication_author.objects.get(id=5).test(), [3, 3, 0]) self.assertEqual(publication_author.objects.get(id=6).test(), [3, 4, 1]) self.assertEqual(publication_author.objects.get(id=7).test(), [3, 5, 2]) self.assertEqual(publication_author.objects.get(id=8).test(), [4, 3, 0]) self.assertEqual(publication_author.objects.get(id=9).test(), [4, 4, 1]) self.assertEqual(publication_author.objects.get(id=10).test(), [4, 5, 2]) # limbo self.assertEqual(limbo_authors.objects.count(),0) self.assertEqual(limbo_pub.objects.count(),0) # publication self.assertEqual(publication.objects.get(id=1).test(), [1, "Bla Bla Bla"]) self.assertEqual(publication.objects.get(id=2).test(), [2, "Kam? Kim! Kum."]) # check if last harvested is set tmp = list(get_table_data("dblp_article", null_dates=False)) self.assertEqual(tmp[0][-1].strftime("%Y-%m-%d"), datetime.datetime.now().strftime("%Y-%m-%d")) # check open references self.assertEqual(OpenReferences.objects.count(), 0)
def test_success_limit(self): setup_tables(os.path.join(test_path, "dblp_test1.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") ingester.set_limit(1) result = ingest_data(ingester) self.assertEqual(result, 1) # check open references self.assertEqual(OpenReferences.objects.count(), 0)
def test_limbo_alias(self): setup_tables(os.path.join(test_path, "dblp_test3.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") ingest_data(ingester) self.assertEqual(limbo_pub.objects.count(), 0) self.assertEqual(cluster.objects.count(), 3) self.assertEqual(authors_model.objects.count(), 5) # check open references self.assertEqual(OpenReferences.objects.count(), 0)
def test_limbo_multi_pubs(self): setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) cl = cluster.objects.create(id=1, name="title") gurl = global_url.objects.create(id=5,domain ="http://dummy.de", url="http://dummy.de") lurl = local_url.objects.create(id=1,url="jlkjöl", global_url=gurl) publication.objects.bulk_create([ publication(local_url=lurl,cluster=cl,title="Title"), publication(local_url=lurl, cluster=cl, title="Title") ]) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") ingest_data(ingester) limbo = limbo_pub.objects.get(id=1).test_extended() self.assertEqual(limbo[0],'Reason.AMB_PUB') self.assertEqual(limbo_authors.objects.get(id=1).test(), [1, 'None', "An Author", 0]) self.assertEqual(limbo_authors.objects.get(id=2).test(), [1, 'None', "Another Author", 1]) self.assertEqual(local_url.objects.count(),1) # check open references self.assertEqual(OpenReferences.objects.count(), 0)
def test_limbo_multi_cluster(self): setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) cluster.objects.bulk_create([ cluster(id=1, name="title"), cluster(id=2, name="title"), ]) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") ingest_data(ingester) self.assertEqual(limbo_authors.objects.get(id=1).test(), [1, 'None', "An Author", 0]) self.assertEqual(limbo_authors.objects.get(id=2).test(), [1, 'None', "Another Author", 1]) self.assertEqual(local_url.objects.count(),0) limbo = limbo_pub.objects.get(id=1).test_extended() print(limbo) compare = ['Reason.AMB_CLUSTER','key',"title","1-5",None,"doi",None,None, None,datetime.date(1990,1,1),"1","2","series", None,"publisher",None,"school","address", "isbn",None,"booktitle","journal"] self.assertEqual(limbo,compare)
def test_complete_publication(self): # for this test a dataset with ALL ROWS filled, will be created to check if all values are # successfully transferred setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE) ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") ingest_data(ingester) publ = publication.objects.first() self.assertEqual(publ.title,"title") self.assertEqual(publ.pages, "1-5") self.assertEqual(publ.doi, "doi") self.assertEqual(publ.abstract, None) self.assertEqual(publ.copyright, None) self.assertEqual(publ.volume, "1") self.assertEqual(publ.number, "2") self.assertEqual(publ.note, None) self.assertEqual(publ.date_added, None) self.assertEqual(publ.date_published, datetime.date(1990,1,1)) # check open references self.assertEqual(OpenReferences.objects.count(), 0)
def test_success_reversed(self): dblpingester = DblpIngester("dblp.ingester", harvesterdb="test_storage") arxivingester = ArxivIngester("arxiv.ingester", harvester_db="test_storage") # arxiv first then dblp result2 = ingest_data(arxivingester) self.assertEqual(result2, 1) result = ingest_data(dblpingester) self.assertEqual(result, 1) # check all tables self.assertEqual(cluster.objects.count(), 1) self.assertEqual(publication.objects.count(), 1) self.assertEqual(local_url.objects.count(), 3) self.assertEqual(global_url.objects.count(), 4) self.assertEqual(limbo_authors.objects.count(), 0) self.assertEqual(limbo_pub.objects.count(), 0) self.assertEqual(pub_medium.objects.count(), 1) # check local url dblp_url = local_url.objects.get(id=3) pub_url = local_url.objects.get(id=2) arxiv_url = local_url.objects.get(id=1) self.assertEqual(dblp_url.test(), [ 3, "dblpkey", 1, publication_type.objects.get(name="article").id, None ]) self.assertEqual(arxiv_url.test(), [ 4, "arxivkey", None, publication_type.objects.get(name="misc").id, None ]) self.assertEqual(pub_url.test(), [ 1, "TODO PLATZHALTER", 1, publication_type.objects.get(name="misc").id, None ]) # check authors self.assertEqual(authors_model.objects.count(), 3) self.assertEqual(author_aliases.objects.count(), 3) self.assertEqual(author_alias_source.objects.count(), 5) # publication authors self.assertEqual(publication_author.objects.count(), 8) # check publication publ = publication.objects.first() self.assertEqual(publ.title, "The Ultimate Title!") # from Arxiv self.assertEqual(publ.pages, "10-14") # DBLP self.assertEqual(publ.note, None) self.assertEqual(publ.doi, "http://google.com") # Arxiv self.assertEqual(publ.abstract, "this is a test") # arxiv self.assertEqual(publ.copyright, None) self.assertEqual(publ.date_added, None) self.assertEqual(publ.date_published, datetime.date(2007, 1, 1)) # DBLP self.assertEqual(publ.volume, "2") # DBLP self.assertEqual(publ.number, "3") # DBLP # check diff tree diff = deserialize_diff_store(publ.differences) self.assertEqual(diff["url_id"], [1, 3]) self.assertEqual(diff["doi"], [{ "bitvector": 1, "votes": 0, "value": "http://google.com" }, { "bitvector": 2, "votes": 0, "value": "http://google.de" }]) self.assertEqual(diff["copyright"], []) self.assertEqual(diff["type_ids"], [{ "bitvector": 1, "votes": 0, "value": 2 }, { "bitvector": 2, "votes": 0, "value": 1 }]) self.assertEqual(diff["pages"], [{ "bitvector": 2, "votes": 0, "value": "10-14" }]) self.assertEqual(OpenReferences.objects.first().test(), [1, 'arxivkey', None])
def full_execution(): ingester = DblpIngester("dblp.ingester") ingester.set_limit(1000) result = ingest_data(ingester)
def test_success_update_global_url(self): global_url.objects.create(id="100", domain='http://dblp.uni-trier.de', url='http://dblp.uni-trier.de/rec/xml/') x = DblpIngester("Hello") self.assertEqual(x.get_global_url().id, 100)