Beispiel #1
0
def trytry():
    PyMongoUtil.clean()
    MemcacheUtil.clean()
    SpiderBloomFilter()
    queue = PyPool.get_queue()
    lock = PyPool.get_lock()
    listener = MyListener()
    r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*")
    s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/", 2, is_out=False, pattern=None, mode=r)
    Spider(s).get_all_words(queue, lock)
    listener.listen(lock, queue)
    WordCount.calc_count()
    return
Beispiel #2
0
def trytry():
    PyMongoUtil.clean()
    MemcacheUtil.clean()
    SpiderBloomFilter()
    queue = PyPool.get_queue()
    lock = PyPool.get_lock()
    listener = MyListener()
    r = Regex("[a-z0-9\-\._]+@[a-z0-9\-\.]+\.[a-z]{2,4}[:,\|]*.*")
    s = SpiderStrategy("http://www.leakedin.com/tag/emailpassword-dump/",
                       2,
                       is_out=False,
                       pattern=None,
                       mode=r)
    Spider(s).get_all_words(queue, lock)
    listener.listen(lock, queue)
    WordCount.calc_count()
    return
Beispiel #3
0
from SpiderUtils.enums import Language
from SpiderUtils.getWords import GetWords
from PyIO.excelUtil import ExcelUtil
from os import path

logger = LogFactory.getlogger("main")

# clean old data
PyMongoUtil.clean()
MemcacheUtil.clean()

# create bloom filter
SpiderBloomFilter()

# multitask prepare
queue = PyPool.get_queue()
lock = PyPool.get_lock()
listener = MyListener()


def err():
    print("please enter the right select")


while True:
    url = raw_input("Please input url:\n")
    print("checking url...")
    if not url.startswith("http"):
        url = "http://" + url
    try:
        statusCode = GetWords.try_connect(url)
Beispiel #4
0
 def __init__(self):
     self.__pool = PyPool.get_pool()
Beispiel #5
0
 def __init__(self):
     self.__pool = PyPool.get_pool()