Esempio n. 1
0
    def test_crawler_execute(self):
        tdc = TDocumentCrawler()
        ccnt = CrawlerState.all().count()
        assert ccnt == 0, ccnt
        tdc.execute()
        states = CrawlerState.all().all()
        assert len(states) == 2, len(states)
        demo = states[1]
        assert 'kitty' in demo.meta['title'], demo.meta
        assert 'demo.pdf' in demo.meta['source_path'], demo.meta

        coll = Collection.by_foreign_id('test')
        assert coll is not None, coll
        assert len(list(coll.documents)) == 1, list(coll.documents)
Esempio n. 2
0
    def test_crawler_execute(self):
        tdc = TDocumentCrawler()
        ccnt = CrawlerState.all().count()
        assert ccnt == 0, ccnt
        tdc.execute()
        states = CrawlerState.all().all()
        assert len(states) == 2, len(states)
        demo = states[1]
        assert 'kitty' in demo.meta['title'], demo.meta
        assert 'demo.pdf' in demo.meta['source_path'], demo.meta

        coll = Collection.by_foreign_id('test')
        assert coll is not None, coll
        assert len(list(coll.documents)) == 1, list(coll.documents)
Esempio n. 3
0
 def test_incremental(self):
     tdc = TDocumentCrawler()
     tdc.execute()
     tdc.execute(incremental=True)
     states = CrawlerState.all().all()
     assert len(states) == 3, len(states)
Esempio n. 4
0
 def test_incremental(self):
     tdc = TDocumentCrawler()
     tdc.execute()
     tdc.execute(incremental=True)
     states = CrawlerState.all().all()
     assert len(states) == 3, len(states)