def trytry(): PyMongoUtil.clean() MemcacheUtil.clean() SpiderBloomFilter() queue = PyPool.get_queue() lock = PyPool.get_lock() listener = MyListener() r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*") s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/", 2, is_out=False, pattern=None, mode=r) Spider(s).get_all_words(queue, lock) listener.listen(lock, queue) WordCount.calc_count() return
from SpiderUtils.enums import Language from SpiderUtils.getWords import GetWords from PyIO.excelUtil import ExcelUtil from os import path logger = LogFactory.getlogger("main") # clean old data PyMongoUtil.clean() MemcacheUtil.clean() # create bloom filter SpiderBloomFilter() # multitask prepare queue = PyPool.get_queue() lock = PyPool.get_lock() listener = MyListener() def err(): print("please enter the right select") while True: url = raw_input("Please input url:\n") print("checking url...") if not url.startswith("http"): url = "http://" + url try: statusCode = GetWords.try_connect(url)
def __init__(self): self.__pool = PyPool.get_pool()