Exemple #1
0
 def test_success_create_global_url(self):
     self.assertEqual(global_url.objects.count(), 0)
     x = DblpIngester("Hello")
     url = global_url.objects.get(id=1)
     self.assertEqual(url.domain, 'http://dblp.uni-trier.de')
     self.assertEqual(x.get_global_url().id, 1)
     self.assertEqual(x.harvester_db, "harvester")
Exemple #2
0
    def test_success(self):
        setup_tables(os.path.join(test_path, "dblp_test1.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
        ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
        self.assertEqual(ingester.get_global_url().id, 3)
        result = ingest_data(ingester)
        self.assertEqual(result, 2)
        # check local url
        self.assertEqual(local_url.objects.get(id=1).test(), [3, 'journals/acta/AkyildizB89', 1, 1,None])
        self.assertEqual(local_url.objects.get(id=2).test(), [1, 'TODO PLATZHALTER', 1, 1,None])
        self.assertEqual(local_url.objects.get(id=3).test(), [3, 'journals/acta/VoglerS014', 1, 1,None])
        self.assertEqual(local_url.objects.get(id=4).test(), [1, 'TODO PLATZHALTER', 1, 1,None])
        # check authors_model
        self.assertEqual(authors_model.objects.get(id=1).test(),["Ian F. Akyildiz", "ian f akyildiz"])
        self.assertEqual(authors_model.objects.get(id=2).test(), ["Horst von Brand", "horst von brand"])
        self.assertEqual(authors_model.objects.get(id=3).test(), ["Walter Vogler", "walter vogler"])
        self.assertEqual(authors_model.objects.get(id=4).test(), ["Christian Stahl", "christian stahl"])
        self.assertEqual(authors_model.objects.get(id=5).test(), ["Richard Müller", "richard muller"])
        # check author alias
        self.assertEqual(author_aliases.objects.get(id=1).test(), [1, "Ian F. Akyildiz"])
        self.assertEqual(author_aliases.objects.get(id=2).test(), [2, "Horst von Brand"])
        self.assertEqual(author_aliases.objects.get(id=3).test(), [3, "Walter Vogler"])
        self.assertEqual(author_aliases.objects.get(id=4).test(), [4, "Christian Stahl"])
        self.assertEqual(author_aliases.objects.get(id=5).test(), [5, "Richard Müller 0001"])
        self.assertEqual(author_aliases.objects.get(id=6).test(), [5, "Richard Müller"])
        # cluster
        self.assertEqual(cluster.objects.get(id=1).name, "bla bla bla")
        self.assertEqual(cluster.objects.get(id=2).name, "kam kim kum")
        # author alias source
        self.assertEqual(author_alias_source.objects.get(id=1).test(), [1, 1])
        self.assertEqual(author_alias_source.objects.get(id=2).test(), [2, 1])
        self.assertEqual(author_alias_source.objects.get(id=3).test(), [3, 3])
        self.assertEqual(author_alias_source.objects.get(id=4).test(), [4, 3])
        self.assertEqual(author_alias_source.objects.get(id=5).test(), [5, 3])
        self.assertEqual(author_alias_source.objects.get(id=6).test(), [6, 3])
        # publication authors
        self.assertEqual(publication_author.objects.get(id=1).test(), [1, 1, 0])
        self.assertEqual(publication_author.objects.get(id=2).test(), [1, 2, 1])
        self.assertEqual(publication_author.objects.get(id=3).test(), [2, 1, 0])
        self.assertEqual(publication_author.objects.get(id=4).test(), [2, 2, 1])
        self.assertEqual(publication_author.objects.get(id=5).test(), [3, 3, 0])
        self.assertEqual(publication_author.objects.get(id=6).test(), [3, 4, 1])
        self.assertEqual(publication_author.objects.get(id=7).test(), [3, 5, 2])
        self.assertEqual(publication_author.objects.get(id=8).test(), [4, 3, 0])
        self.assertEqual(publication_author.objects.get(id=9).test(), [4, 4, 1])
        self.assertEqual(publication_author.objects.get(id=10).test(), [4, 5, 2])

        # limbo
        self.assertEqual(limbo_authors.objects.count(),0)
        self.assertEqual(limbo_pub.objects.count(),0)

        # publication
        self.assertEqual(publication.objects.get(id=1).test(), [1, "Bla Bla Bla"])
        self.assertEqual(publication.objects.get(id=2).test(), [2, "Kam? Kim! Kum."])
        # check if last harvested is set
        tmp = list(get_table_data("dblp_article", null_dates=False))
        self.assertEqual(tmp[0][-1].strftime("%Y-%m-%d"), datetime.datetime.now().strftime("%Y-%m-%d"))

        # check open references
        self.assertEqual(OpenReferences.objects.count(), 0)
Exemple #3
0
 def test_success_limit(self):
     setup_tables(os.path.join(test_path, "dblp_test1.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
     ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
     ingester.set_limit(1)
     result = ingest_data(ingester)
     self.assertEqual(result, 1)
     # check open references
     self.assertEqual(OpenReferences.objects.count(), 0)
Exemple #4
0
    def test_limbo_alias(self):
        setup_tables(os.path.join(test_path, "dblp_test3.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
        ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
        ingest_data(ingester)

        self.assertEqual(limbo_pub.objects.count(), 0)
        self.assertEqual(cluster.objects.count(), 3)
        self.assertEqual(authors_model.objects.count(), 5)
        # check open references
        self.assertEqual(OpenReferences.objects.count(), 0)
Exemple #5
0
 def test_limbo_multi_pubs(self):
     setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
     cl = cluster.objects.create(id=1, name="title")
     gurl = global_url.objects.create(id=5,domain ="http://dummy.de", url="http://dummy.de")
     lurl = local_url.objects.create(id=1,url="jlkjöl", global_url=gurl)
     publication.objects.bulk_create([
         publication(local_url=lurl,cluster=cl,title="Title"),
         publication(local_url=lurl, cluster=cl, title="Title")
     ])
     ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
     ingest_data(ingester)
     limbo = limbo_pub.objects.get(id=1).test_extended()
     self.assertEqual(limbo[0],'Reason.AMB_PUB')
     self.assertEqual(limbo_authors.objects.get(id=1).test(), [1, 'None', "An Author", 0])
     self.assertEqual(limbo_authors.objects.get(id=2).test(), [1, 'None', "Another Author", 1])
     self.assertEqual(local_url.objects.count(),1)
     # check open references
     self.assertEqual(OpenReferences.objects.count(), 0)
Exemple #6
0
 def test_limbo_multi_cluster(self):
     setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
     cluster.objects.bulk_create([
         cluster(id=1, name="title"),
         cluster(id=2, name="title"),
     ])
     ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
     ingest_data(ingester)
     self.assertEqual(limbo_authors.objects.get(id=1).test(), [1, 'None', "An Author", 0])
     self.assertEqual(limbo_authors.objects.get(id=2).test(), [1, 'None', "Another Author", 1])
     self.assertEqual(local_url.objects.count(),0)
     limbo = limbo_pub.objects.get(id=1).test_extended()
     print(limbo)
     compare = ['Reason.AMB_CLUSTER','key',"title","1-5",None,"doi",None,None,
                             None,datetime.date(1990,1,1),"1","2","series",
                             None,"publisher",None,"school","address",
                             "isbn",None,"booktitle","journal"]
     self.assertEqual(limbo,compare)
Exemple #7
0
 def test_complete_publication(self):
     # for this test a dataset with ALL ROWS filled, will be created to check if all values are
     # successfully transferred
     setup_tables(os.path.join(test_path, "dblp_test2.csv"), DBLP_ARTICLE, ADD_DBLP_ARTICLE)
     ingester = DblpIngester("dblp.ingester", harvesterdb="test_storage")
     ingest_data(ingester)
     publ = publication.objects.first()
     self.assertEqual(publ.title,"title")
     self.assertEqual(publ.pages, "1-5")
     self.assertEqual(publ.doi, "doi")
     self.assertEqual(publ.abstract, None)
     self.assertEqual(publ.copyright, None)
     self.assertEqual(publ.volume, "1")
     self.assertEqual(publ.number, "2")
     self.assertEqual(publ.note, None)
     self.assertEqual(publ.date_added, None)
     self.assertEqual(publ.date_published, datetime.date(1990,1,1))
     # check open references
     self.assertEqual(OpenReferences.objects.count(), 0)
    def test_success_reversed(self):
        dblpingester = DblpIngester("dblp.ingester",
                                    harvesterdb="test_storage")
        arxivingester = ArxivIngester("arxiv.ingester",
                                      harvester_db="test_storage")

        # arxiv first then dblp
        result2 = ingest_data(arxivingester)
        self.assertEqual(result2, 1)
        result = ingest_data(dblpingester)
        self.assertEqual(result, 1)

        # check all tables
        self.assertEqual(cluster.objects.count(), 1)
        self.assertEqual(publication.objects.count(), 1)
        self.assertEqual(local_url.objects.count(), 3)
        self.assertEqual(global_url.objects.count(), 4)
        self.assertEqual(limbo_authors.objects.count(), 0)
        self.assertEqual(limbo_pub.objects.count(), 0)
        self.assertEqual(pub_medium.objects.count(), 1)
        # check local url
        dblp_url = local_url.objects.get(id=3)
        pub_url = local_url.objects.get(id=2)
        arxiv_url = local_url.objects.get(id=1)
        self.assertEqual(dblp_url.test(), [
            3, "dblpkey", 1,
            publication_type.objects.get(name="article").id, None
        ])
        self.assertEqual(arxiv_url.test(), [
            4, "arxivkey", None,
            publication_type.objects.get(name="misc").id, None
        ])
        self.assertEqual(pub_url.test(), [
            1, "TODO PLATZHALTER", 1,
            publication_type.objects.get(name="misc").id, None
        ])
        # check authors
        self.assertEqual(authors_model.objects.count(), 3)
        self.assertEqual(author_aliases.objects.count(), 3)
        self.assertEqual(author_alias_source.objects.count(), 5)
        # publication authors
        self.assertEqual(publication_author.objects.count(), 8)
        # check publication
        publ = publication.objects.first()
        self.assertEqual(publ.title, "The Ultimate Title!")  # from Arxiv
        self.assertEqual(publ.pages, "10-14")  # DBLP
        self.assertEqual(publ.note, None)
        self.assertEqual(publ.doi, "http://google.com")  # Arxiv
        self.assertEqual(publ.abstract, "this is a test")  # arxiv
        self.assertEqual(publ.copyright, None)
        self.assertEqual(publ.date_added, None)
        self.assertEqual(publ.date_published, datetime.date(2007, 1,
                                                            1))  # DBLP
        self.assertEqual(publ.volume, "2")  # DBLP
        self.assertEqual(publ.number, "3")  # DBLP
        # check diff tree
        diff = deserialize_diff_store(publ.differences)
        self.assertEqual(diff["url_id"], [1, 3])
        self.assertEqual(diff["doi"], [{
            "bitvector": 1,
            "votes": 0,
            "value": "http://google.com"
        }, {
            "bitvector": 2,
            "votes": 0,
            "value": "http://google.de"
        }])
        self.assertEqual(diff["copyright"], [])
        self.assertEqual(diff["type_ids"], [{
            "bitvector": 1,
            "votes": 0,
            "value": 2
        }, {
            "bitvector": 2,
            "votes": 0,
            "value": 1
        }])
        self.assertEqual(diff["pages"], [{
            "bitvector": 2,
            "votes": 0,
            "value": "10-14"
        }])

        self.assertEqual(OpenReferences.objects.first().test(),
                         [1, 'arxivkey', None])
Exemple #9
0
def full_execution():
    ingester = DblpIngester("dblp.ingester")
    ingester.set_limit(1000)
    result = ingest_data(ingester)
Exemple #10
0
 def test_success_update_global_url(self):
     global_url.objects.create(id="100",
                               domain='http://dblp.uni-trier.de',
                               url='http://dblp.uni-trier.de/rec/xml/')
     x = DblpIngester("Hello")
     self.assertEqual(x.get_global_url().id, 100)