Exemple #1
0
def main():
    base_url = "https://t.me/BonOgood"
    depth = 2
    userbot = Client("userbot")
    messages_filter = InputMessagesFilterUrl()

    worker_arguments = {"userbot": userbot, "messages_filter": messages_filter}

    userbot.start()

    mongo_storage = Storage(
        base_url=base_url,
        db_name="crawlerDB",
        username="******",
        password="******",
        max_depth=depth,
    )

    crawler = Crawler(base_url=base_url,
                      depth=depth,
                      storage=mongo_storage,
                      worker_class=Worker,
                      workers_number=1,
                      **worker_arguments)
    crawler.create_workers()
    crawler.run_workers()
Exemple #2
0
def main():
    crawler = Crawler(base_url='kiryat4.org.il',
                      db_name='crawlerDB',
                      depth=5,
                      storage_class=Storage,
                      worker_class=Worker,
                      workers_number=5,
                      username='******',
                      password="******")
    crawler.create_workers()
    crawler.run_workers()
Exemple #3
0
def main():
    base_url = "en.wikipedia.org/wiki/Main_Page"
    depth = 2

    sqlite_storage = Storage(db_name="storage.db", base_url=base_url, max_depth=depth)
    crawler = Crawler(
        base_url=base_url,
        depth=depth,
        storage=sqlite_storage,
        worker_class=Worker,
        workers_number=2,
    )
    crawler.create_workers()
    crawler.run_workers()
    crawler.idle()