while threads and threads_counter<=50: # the crawl is still active for thread in threads: if not thread.is_alive(): # remove the stopped threads threads.remove(thread) t2 = threading.Thread(target=downloads.downloaderware, args=(start_url,)) t2.start() threads.append(t2) time.sleep(SLEEP_TIME) t1 = threading.Thread(target=spider_schedule.SpiderSchedule,args=(start_url,)) t1.start() threads.append(t1) # all threads have been processed # sleep temporarily so CPU can focus execution on other threads proxy_counter=proxy_counter+1 if proxy_counter%25==0: p_p.clean_nonworking() time.sleep(SLEEP_TIME) threads_counter=threads_counter+1#线程计数器 #爬取结束,清空数据库 sp.deleteDatabase()