Ejemplo n.º 1
0
def crawldir(directory, language=None, country=None):
    """Crawl the given directory."""
    log.info('Crawling %r...', directory)
    meta = {}
    if language is not None:
        meta['languages'] = [language]
    if country is not None:
        meta['countries'] = [country]
    crawler = DirectoryCrawler()
    crawler.execute(directory=directory, meta=meta)
Ejemplo n.º 2
0
def crawldir(directory, source=None, language=None, country=None):
    """Crawl the given directory."""
    directory = os.path.abspath(directory)
    directory = os.path.normpath(directory)
    log.info('Crawling %r...', directory)
    meta = {}
    if language is not None:
        meta['languages'] = [language]
    if country is not None:
        meta['countries'] = [country]
    crawler = DirectoryCrawler()
    crawler.execute(directory=directory, source=source, meta=meta)
Ejemplo n.º 3
0
def crawldir(directory, source=None, language=None, country=None):
    """Crawl the given directory."""
    directory = os.path.abspath(directory)
    directory = os.path.normpath(directory)
    log.info('Crawling %r...', directory)
    meta = {}
    if language is not None:
        meta['languages'] = [language]
    if country is not None:
        meta['countries'] = [country]
    crawler = DirectoryCrawler()
    crawler.execute(directory=directory, source=source, meta=meta)
Ejemplo n.º 4
0
    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        crawler = DirectoryCrawler()
        crawler.execute(directory=csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'experts.csv' in rec0.document.meta.file_name, \
            rec0.document.meta
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        assert 'experts' in repr(doc)

        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)
Ejemplo n.º 5
0
    def test_load_csv_file(self):
        csv_path = self.get_fixture_path('experts.csv')
        crawler = DirectoryCrawler()
        crawler.execute(directory=csv_path)
        assert Document.all().count() == 1, Document.all().count()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 14, len(records)
        rec0 = records[0]
        assert str(rec0.id) in repr(rec0), repr(rec0)
        assert 'experts.csv' in rec0.document.meta.file_name, \
            rec0.document.meta
        assert 'nationality' in rec0.data, rec0.data
        assert 'name' in rec0.data, rec0.data

        doc = rec0.document
        assert 'experts' in repr(doc)

        doc.delete_records()
        records = db.session.query(DocumentRecord).all()
        assert len(records) == 0, len(records)
Ejemplo n.º 6
0
 def test_load_sample_directory(self):
     csv_path = self.get_fixture_path('samples')
     crawler = DirectoryCrawler()
     crawler.execute(directory=csv_path)