def trytry(): PyMongoUtil.clean() MemcacheUtil.clean() SpiderBloomFilter() queue = PyPool.get_queue() lock = PyPool.get_lock() listener = MyListener() r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*") s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/", 2, is_out=False, pattern=None, mode=r) Spider(s).get_all_words(queue, lock) listener.listen(lock, queue) WordCount.calc_count() return
if not md.isdigit() or int(md) <= 0 or int(md) >= 5: err() continue else: md = int(md) if md == 4: regex = raw_input("please enter regex string:\n") mode = Regex(regex) else: mode = Language.get_enum(md) break s = SpiderStrategy(url, int(depth), isOut, None, mode) Spider(s).get_all_words(queue, lock) listener.listen(lock, queue) WordCount.calc_count() while True: query_count = raw_input("Spider Finished. please enter how many statistics you want.[All]\n") file_path = raw_input("Please input the path you want to put result.csv.\n") if not path.isdir(file_path): print("Please enter the right path.\n") continue if query_count == None or query_count == '': ExcelUtil.writefile(file_path, PyMongoUtil.query_result()) elif not query_count.isdigit(): print("Please enter the right query count.\n") continue else: ExcelUtil.writefile(file_path, PyMongoUtil.query_result(int(query_count))) print("Generate success. Please check the result! Bye\n")