Exemple #1
0
def run():

    
    logging.info('start subscribe server.....')

    
    ##创建抓取网页的线程池
    grab_pool = ThreadPool(GRAB_NUM)
    for i in range(GRAB_NUM):
        grab_pool.add_task(do_grab,None,id = i+1)
        
    ##创建解析网页的线程池
    paser_pool = ThreadPool(PASER_NUM)
    for i in range(PASER_NUM):
        paser_pool.add_task(do_paser,None,id = i+1)

        
    ##创建发送邮件的线程池
    send_pool = ThreadPool(MAIL_SENDER_NUM)
    for i in range(MAIL_SENDER_NUM):
        send_pool.add_task(do_send,None,id = i+1)

    # Join and destroy all threads
    grab_pool.destroy()
    paser_pool.destroy()
    send_pool.destroy()
Exemple #2
0
    # set/get
    print r.set("key", 'b' * 56000)
    print len(r.get("key"))

    # incr
    print r.set("incr_key", 0)
    print r.get("incr_key")
    print r.incr('incr_key')
    print r.get("incr_key")


def press_test():
    r = redis.StrictRedis(host='localhost', port=8323)
    for i in range(10000):
        key = 'foo_%d' % i
        r.set(key, 'b' * i)
        if i % 1000 == 0:
            print key, "->", len(r.get(key))


if __name__ == "__main__":
    #functional_test()
    # Create thread pool with nums threads
    pool = ThreadPool(32)
    # Add a task into pool
    for n in range(10):
        pool.add_task(functional_test)
        pool.add_task(press_test)
    # Join and destroy all threads
    pool.destroy()
        for i in range(1, 10):
            try:
                imageUrls = get_image_url(restUrl)
                break
            except Exception, e:
                print 'get restUrl error times' + str(i) + ': %s' % (e,)
                logging.error('get restUrl error times' + str(i) + ': %s' % (e,))
                time.sleep(10)

        if imageUrls is None or len(imageUrls) == 0:
            print 'get imageUrls error %s' % restUrl
            logging.error('get imageUrls error %s' % restUrl)
            continue

        # logging.debug("progress: %d of %d, %s , %d images", progress, total, reviewName, len(urls))  # 进度

        count = 0
        for imageUrl in imageUrls:
            # download_image(imageUrl, LOCAL_DIR + reviewName,
            # imageUrl[imageUrl.rfind("/") + 1:] + ".jpg")
            pool.add_task(download_image, imageUrl, LOCAL_DIR + reviewName,
                          imageUrl[imageUrl.rfind("/") + 1:], SLEEP_SECONDS)  # 多线程下载图片
            count += 1
            # logging.debug("task added: %d", count)
            # logging.debug("finished : %s", reviewName)
            # print "finished : %s" % ( reviewName)
            #logging.info("finished : %s" % ( reviewName))
    pool.destroy()

def main_fresh(dbOrNot):
    """
    Monitor URLs using fresh data.
    """
    # set value for oldUrlObjDic dict.
    f = open("./urgentCriterion_new")
    while 1:
        string = f.readline().strip()
        if not string:
            break
        arr = string.split(",")
        #URL Object Format: URL(length, md5)
        oldUrlObjDic[arr[0]] = URL(int(arr[1]), arr[2])
    f.close()

    f = open("./urgentAccessErrorURLs")
    while 1:
        string= f.readline().strip()
        if not string:
            break
        aeURLs.append(string)
    f.close()

    #lxw_tp
    #threadingNum = threading.Semaphore(THREADS_NUM)
    tp = ThreadPool(THREADS_NUM)

    threads = []
    urlCount = 0
    # monitor each url in .urls file
    f = open("./.urgentURLS")
    while 1:
        url = f.readline().strip()
        if not url:
            break

        #lxw_tp
        #Multiple Thread: Deal with "one url by one single thread".
        #mt = MyThread(monitor, (url,), threadingNum)
        tp.add_task(monitor, url)
        #mt.start()
        #threads.append(mt)

        urlCount += 1
    f.close()

    #lxw_tp
    tp.destroy()
    #for thread in threads:
    #    thread.start()

    """
    while 1:
        over = True
        for thread in threads:
            if thread.isAlive():
                if not thread.isTimedOut():     # not "Timed Out".
                    over = False
                else:
                    urgentMyUtils.writeLog("lxw_Timed Out", thread.getURL(), "")
        if over:
            break
    """

    if aeCount > 0:
        allContent = "本次共监测网站{0}个, 其中有{1}个网站访问异常, 详细信息如下:\n\n{2}".format(urlCount, aeCount, aeContent)
        urgentMyUtils.sendEmail(aeSubject, allContent)
    if uwCount >0:
        allContent = "本次共监测网站{0}个, 其中有{1}个网站监测到有更新, 详细信息如下:\n\n{2}".format(urlCount, uwCount, uwContent)
        urgentMyUtils.sendEmail(uwSubject, allContent)

    #Update Criterion file.
    f = open("./urgentCriterion_new", "w")
    for url in newUrlObjDic.keys():
        f.write("{0},{1},{2}\n".format(url, newUrlObjDic[url].length, newUrlObjDic[url].getMD5Str()))
    f.close()

    dbOrNot = False
    if dbOrNot:
        #update criterion in database
        urgentMyUtils.updateCriterion(newUrlObjDic)

    #Update accessErrorURLs file.
    f = open("./urgentAccessErrorURLs", "w")
    for url in aeURLs:
        f.write(url + "\n")
    f.close()
Exemple #5
0
class Tasks(Codes):

    def __init__(self):
        self.operate = Operate()
        self._api = OpenApi()
        self._http = HttpClient.getInstance()
        self._pool = ThreadPool(5)    # 初始化5个线程
        print("Task Class 初始化完毕")

    def getAllAdmin(self):
        print("所有管理员: %s", variable.Admins)
        return variable.Admins

    def getAllGroup(self):
        print("所有关注群: %s", variable.Groups)
        return variable.Groups

    def addAdmin(self, qq):
        return self.operate.addAdmin(qq)

    def delAdmin(self, qq):
        return self.operate.delAdmin(qq)

    def isAdmin(self, qq):
        return self.operate.isAdmin(qq)

    def addGroup(self, qq):
        return self.operate.addGroup(qq)

    def delGroup(self, qq):
        return self.operate.delGroup(qq)

    def inGroup(self, qq):
        # print("inGroup: %s", qq)
        return self.operate.inGroup(qq)

    def addAsk(self, question, answer):
        return self.operate.addAsk(question, answer)

    def delAsk(self, Id):
        return self.operate.delAsk(Id)

    def getAsk(self, content):
        return self.operate.getAsk(content)

    def end(self):
        self._pool.destroy()

    def uin_to_qq(self, uin):
        if uin in variable.UsersQQ:
            return variable.UsersQQ.get(uin)
        print("获取qq %s %s %s", uin, variable.Vfwebqq, variable.Referer)
        html = self._http.get(variable.Get_friend_uin2.format(uin, self.bytesToStr(variable.Vfwebqq)), referer = variable.Referer)
        print("uin_to_qq: %s", html)
        try:
            result = json.loads(self.bytesToStr(html))
            if result.get("retcode") != 0:
                return ""
            qq = result.get("result").get("account")
            if qq:
                variable.UsersQQ[uin] = str(qq)
                return str(qq)
        except Exception as e:
            print(e)
            return ""

    def sendMsg(self, *args, **kwargs):
        print("回复消息")
        url = kwargs.get("url")
        data = kwargs.get("data")
        # print(data)
        referer = kwargs.get("referer")
        result = self._http.post(url = url, data = data, referer = referer)
        print("回复结果: %s", result)

    def otherMsg(self, content, to, url, uin):
        if content:
            html = self._http.get(url = variable.RobotUrl.format(quote(content), uin))
            html = html.replace("\\n", "").replace("\n", "")
            html = self._api.parse(html)
            html = self._api.getResult()
            if html:
                print("智能回复: ", html)
                data = {'r' : variable.Msg_Data.format(to, uin, html, variable.Clientid, variable.Msgid, variable.Psessionid)}
                print(data)
                self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer)

    def analyze(self, qq, uin, content, iseq = None):
        print("开始解析消息")
        if iseq:
            print("消息来自群")
            to = "group_uin"
            url = variable.Send_qun_msg2
        else:
            print("消息来自好友")
            to = "to"
            url = variable.Send_buddy_msg2
        # 是管理员
        if self.isAdmin(qq) and content in ("开启机器人", "关闭机器人", "退出"):
            # 解析管理员命令
            _msg = ""
            print("是管理员消息")
            if content == "开启机器人":
                variable.State = True
                print("机器人已开启")
                _msg = "机器人已开启"
            elif content == "关闭机器人":
                variable.State = False
                print("机器人已关闭")
                _msg = "机器人已关闭"
            elif content == "退出":
                variable.State = False
                variable.Exit = True
                print("机器人已退出")
                _msg = "机器人已退出"
            if _msg:
                data = {'r' : variable.Msg_Data.format(to, uin, _msg, variable.Clientid, variable.Msgid, variable.Psessionid)}
                self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer)
            return
        # 给我发送的私人消息(是否是命令)
        result = variable.Command.findall(content)
        if result and to == "to":
            ver, msg = result[0]
            _msg = ""
            if ver == variable.AddAdmin:
                # 添加管理员
                print("添加管理员")
                if self.addAdmin(msg):
                    _msg = "添加管理员: " + msg + " 成功"
                else:
                    _msg = "添加管理员: " + msg + " 失败"
            elif ver == variable.DelAdmin:
                # 删除管理员
                print("删除管理员")
                if self.delAdmin(msg):
                    _msg = "删除管理员: " + msg + " 成功"
                else:
                    _msg = "删除管理员: " + msg + " 失败"
            elif ver == variable.AddAttention:
                # 添加关注群号
                print("添加关注群")
                if self.addGroup(msg):
                    _msg = "添加关注群: " + msg + " 成功"
                else:
                    _msg = "添加关注群: " + msg + " 失败"
            elif ver == variable.DelAttention:
                # 删除关注群号
                print("删除关注群号")
                if self.delGroup(msg):
                    _msg = "删除关注群: " + msg + " 成功"
                else:
                    _msg = "删除关注群: " + msg + " 失败"
            if _msg:
                data = {'r' : variable.Msg_Data.format(to, uin, _msg, variable.Clientid, variable.Msgid, variable.Psessionid)}
                self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer)
            return
        if content.startswith("#") and len(content) > 2 and self.inGroup(iseq):
            # 开头以#开头并且长度大于2并且是关注群发送的消息
            i = 0
            content = content[1:].strip()
            for w in content:
                if w in variable.Filter:
                    print("发现过滤词: ", w)
                    i += 1
            if i == 0:
                self._pool.add_task(callback = self.otherMsg, content = content, to = to, url = url, uin = uin)
                return
            else:
                data = {'r' : variable.Msg_Data.format(to, uin, "你想干什么!f**k", variable.Clientid, variable.Msgid, variable.Psessionid)}
                self._pool.add_task(callback = self.sendMsg, url = url, data = data, referer = variable.Referer)
                return
        if to == "to" and len(content) > 2:
            # 私人消息
            self._pool.add_task(callback = self.otherMsg, content = content.strip(), to = to, url = url, uin = uin)
            return

    def delwith(self, fuin, suin, iseq, content):
        '''
        #fuin 消息发送者
        #suin 群消息发送人qq
        #iseq 群号码
        #content 消息内容
        '''
        print("%s %s %s %s", fuin, suin, iseq, content)
        # 如果是群文件消息
        if content.startswith("<?xml"):
            print("发现共享文件")
        # 发送者qq
        if iseq and suin:
            qq = self.uin_to_qq(suin)
        else:
            qq = self.uin_to_qq(fuin)
        print("qq: %s", qq)
        self.analyze(qq, fuin, content, iseq)
Exemple #6
0
 def lagouScrapy(self):
     tp = ThreadPool(18)
     for i in range(200):
         tp.add_task(self.lagou, i)
         i += 1
     tp.destroy()
Exemple #7
0
def start(baseUrl,seedUrl):
    # clean reffer in reffer.txt
    f = open("reffer.txt","w")
    f.close()

    #seed = Request(base='http://192.168.42.131/dvwa/index.php',url='http://192.168.42.131/dvwa/index.php',method='get')
    seed = request.Request(base=baseUrl,url=seedUrl,timeout=config.conf['connTimeout'],query={},method='get')
    #seed = request.Request(base='http://192.168.42.132/dvwa/',url='http://192.168.42.132/dvwa/',query={},method='get')
    colors.blue( '种子URL: %s\n'%seed._url)
    logfileName = create_logfile(seed._url)
    cookie = getCookie(seed._url)
    
    # begin crawler
    tup = urlparse.urlparse(seed._url)
    netloc = tup.netloc # seed url 
    count = 0
    q = Queue.Queue()
    bf = bloomFilter.BloomFilter(0.001,100000)
    # readreffer from reffer.txt
    '''
    reffer = readReffer()
    reqSet = []
    reqSet.append(seed)
    reqSet.extend(reffer)
    for i in reqSet:
        q.put(i)
        bf.insert(i._url)
    '''
    q.put(seed)
    bf.insert(seed._url)

    nums = config.conf['MaxThread']
    pool = ThreadPool(nums)
    begin = time.time()
    while(not q.empty()):
        req = q.get()
        req._cookies = cookie
        reqs = crawler.crawl(req,tree)

        if req._query != {} and is_tree_full(req._url,tree):
        #if req._query != {}:
            count += 1 
            print 'URL: ',req._BFUrl,'  ', req._source
            pool.add_task(startCheck,req,logfileName)
        

        for x in reqs:
            if not bf.exist(x._BFUrl):
                bf.insert(x._BFUrl)
                q.put(x)


    pool.destroy()
    end = time.time()
    
    f = open(logfileName,'r')
    colors.blue('\n扫描结果:\n\n')
    x  = f.read()
    colors.green(x)
    colors.blue('\n扫描结果已保存在 "%s"\n\n'%(os.getcwd()+'/'+logfileName)+' 中')
    cost = end - begin 
    print "耗时:%f秒"%cost
    print "进行测试的URL数量:",count
    f.close()
    f = open(logfileName,'a')
    f.write(advice())
    f.close()
    os.system('ps -ef | grep -v grep | grep proxy.py | awk \'{print $2}\'|xargs kill -9')
    '''