Beispiel #1
0
    def crawl(self, list_domains):
        main_thread = threading.currentThread()
        #domaines=self.db.new_domaines.distinct('domaine')
        domains = list_domains.split(',')
        threadpool = []
        lock = threading.Lock()
        rec = Record(self.db_value, lock)
        rec.start()
        i = 0
        for domain in domains:
            i = i + 1
            cw = CrawlerThread(domain, self.db, lock)
            cw.run()

            if i % 5 == 0:
                for t in threading.enumerate():
                    if t is not main_thread:
                        t.join(2)
        stop = True

        while (stop):
            for t in threadpool:
                if not t.IsActive():
                    threadpool.remove(t)
                if len(threadpool) == 0:
                    stop = False