Example #1
0
    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    # 兼容PY3
                    each_proxy = each_proxy.decode('utf-8')

                value = self.db.getvalue(each_proxy)
                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    if value and int(value) < 1:
                        self.db.inckey(each_proxy, 1)
                    self.log.info('ProxyValidSchedule: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    if value and int(value) < -5:
                        # 计数器小于-5删除该代理
                        self.db.delete(each_proxy)
                    else:
                        self.db.inckey(each_proxy, -1)
                    self.log.info('ProxyValidSchedule: {} validation fail'.format(each_proxy))

            self.log.info('ProxyValidSchedule running normal')
            sleep(60 * 1)
Example #2
0
    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value', 1)
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    self.log.info('ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info('ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) <= FAIL_COUNT:
                        self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
    def validProxy(self):
        # 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        self.db.changeTable(self.raw_proxy_queue)
        try:
            raw_proxy_item, value = self.db.pop()
            self.log.info('ProxyRefreshSchedule: %s 爬取ip 检验 start' %
                          time.ctime())
            # 计算剩余代理,用来减少重复计算
            # remaining_proxies = self.getAll()
            while raw_proxy_item:
                raw_proxy = raw_proxy_item
                if validUsefulProxy(raw_proxy):
                    self.db.changeTable(self.useful_proxy_queue)
                    self.db.put(value)
                    self.log.info('ProxyRefreshSchedule: %s 爬取ip 检验 pass' %
                                  raw_proxy)
                else:
                    self.log.info('ProxyRefreshSchedule: %s 爬取ip 检验 fail' %
                                  raw_proxy)

                self.db.changeTable(self.raw_proxy_queue)
                raw_proxy_item, value = self.db.pop()
                if raw_proxy_item is None:
                    break
                # remaining_proxies = self.getAll()
            self.log.info('ProxyRefreshSchedule: %s 爬取ip 检验 complete' %
                          time.ctime())
        except Exception as e:
            # print(e)
            pass
Example #4
0
    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    #self.log.info('ProxyCheck: {} validation pass'.format(proxy))
                else:
                    #self.log.info('ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) <= -FAIL_COUNT:
                        self.log.warning(
                            'ProxyCheck: {} fail too many, delete!'.format(
                                proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
Example #5
0
    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        thread_id = threading.currentThread().ident
        log.info("thread_id:{thread_id} useful_proxy proxy check start".format(
            thread_id=thread_id))

        total = 0
        succ = 0
        fail = 0
        while self.queue.qsize():
            proxy = self.queue.get()
            (http_result, _) = validUsefulProxy(proxy)
            if http_result:
                self.tickUsefulProxyVaildSucc(proxy)
                succ = succ + 1
                log.debug(
                    "ProxyCheck: {proxy} validation pass".format(proxy=proxy))
            else:
                self.tickUsefulProxyVaildFail(proxy)
                fail = fail + 1
                log.debug(
                    "ProxyCheck: {proxy} validation fail".format(proxy=proxy))

            self.queue.task_done()
            total = total + 1
            self.tickUsefulProxyVaildTotal(proxy)

        log.info(
            'thread_id:{thread_id} proxy check end, total:{total}, succ:{succ}, fail:{fail}'
            .format(thread_id=thread_id, total=total, succ=succ, fail=fail))
Example #6
0
    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                if value and value < -5:
                    # 计数器小于-5删除该代理
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')
Example #7
0
    def run(self):
        #todo 该方法重写threading里面的run方法,实例化该类,然后.start()就按照平时的进程执行
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                #todo 一直去循环判断数据库里面的useful_proxy_queue
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    # 验证通过计数器加1
                    if counter and int(counter) < 1:
                        self.db.put(proxy, num=int(counter) + 1)
                    else:
                        self.db.put(proxy)
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    # 验证失败,计数器减1
                    if counter and int(counter) <= -FAIL_COUNT:
                        self.log.info(
                            'ProxyCheck: {} fail too many, delete!'.format(
                                proxy))
                        self.db.delete(proxy)
                    else:
                        self.db.put(proxy, num=int(counter) - 1)

                proxy_item = self.db.pop()
            sleep(60 * 5)
    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy_item = self.db.pop()
        self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
        # 计算剩余代理,用来减少重复计算
        remaining_proxies = self.getAll()
        while raw_proxy_item:
            raw_proxy = raw_proxy_item.get('proxy')
            if isinstance(raw_proxy, bytes):
                # 兼容Py3
                raw_proxy = raw_proxy.decode('utf8')

            if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy_item = self.db.pop()
            remaining_proxies = self.getAll()
        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
    def validProxy(self):
        thread_id = threading.currentThread().ident
        log.info("thread_id:{thread_id}, Start ValidProxy `raw_proxy_queue`".format(thread_id=thread_id))

        total = 0
        succ = 0
        fail = 0

        while self.queue.qsize():
            proxy = self.queue.get()
            if proxy not in self.remaining_proxies:
                (http_result, https_result) = validUsefulProxy(proxy)
                if http_result:
                    self.saveUsefulProxy(proxy, https_result)
                    self.deleteRawProxy(proxy)
                    self.remaining_proxies.append(proxy)

                    succ = succ + 1
                else:
                    self.tickRawProxyVaildFail(proxy)

                    fail = fail + 1
                    log.debug('ProxyRefreshSchedule: %s validation fail' % proxy)
                # self.tickRawProxyVaildSucc(proxy)
                log.debug('ProxyRefreshSchedule: %s validation pass' % proxy)
            else:
                self.deleteRawProxy(proxy)

                log.debug('ProxyRefreshSchedule: %s repetition, skip!' % proxy)

            self.queue.task_done()
            self.tickRawProxyVaildTotal(proxy)
            total = total + 1

        log.info('thread_id:{thread_id}, ValidProxy Complete `raw_proxy_queue`, total:{total}, succ:{succ}, fail:{fail}'.format(thread_id=thread_id, total=total, succ=succ, fail=fail))
Example #10
0
    def __validProxy(self):
        """
        验证代理
        :return:
        """
        time.sleep(60 * 0 * random.random())
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy) == True:
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    # 失败计数器减一
                    print "原有value  " + str(self.db.getvalue(each_proxy))
                    if self.db.getvalue(each_proxy) >= 0:
                        self.db.inckey(each_proxy, -1*int(self.db.getvalue(each_proxy)))
                    else:
                        self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                print  value
                if None != value and int(value) < 0:
                    # 计数器小于-5删除该代理
                    print "删除" + each_proxy
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')
Example #11
0
    def run(self):
        self.db.changeTable(self.useful_proxy_queue)

        while self.queue.qsize():
            proxy = self.queue.get()
            count = self.item_dict[proxy]
            if validUsefulProxy(proxy, self.mode):
                # 验证通过计数器减1
                if count and int(count) > 0:
                    self.db.put(proxy, num=int(count) - 1)
                else:
                    pass
                self.log.info('Mode:{} ProxyCheck: {} validation pass'.format(
                    self.mode, proxy))
            else:
                self.log.info('Mode:{} ProxyCheck: {} validation fail'.format(
                    self.mode, proxy))
                if count and int(count) + 1 >= FAIL_COUNT:
                    self.log.info(
                        'Mode:{} ProxyCheck: {} fail too many, delete!'.format(
                            self.mode, proxy))
                    self.db.delete(proxy)
                else:
                    self.db.put(proxy, num=int(count) + 1)
            self.queue.task_done()
Example #12
0
    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy_item = self.db.pop()
        self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
        # 计算剩余代理,用来减少重复计算
        remaining_proxies = self.getAll()
        while raw_proxy_item:
            raw_proxy = raw_proxy_item.get('proxy')
            if isinstance(raw_proxy, bytes):
                # 兼容Py3
                raw_proxy = raw_proxy.decode('utf8')

            if (raw_proxy not in remaining_proxies) and validUsefulProxy(raw_proxy):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % raw_proxy)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % raw_proxy)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy_item = self.db.pop()
            remaining_proxies = self.getAll()
        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
Example #13
0
 def run(self):
     self.db.changeTable(self.useful_proxy_queue)
     while True:
         proxy = self.db.pop()
         if proxy:
             addr = "%s:%s" % (proxy.get('ip'), proxy.get('port'))
             if validUsefulProxy(addr):
                 self.log.info('ProxyCheck: {} validation pass'.format(addr))
             else:
                 self.log.info('ProxyCheck: {} validation fail'.format(addr))
                 self.db.delete(proxy['ip'])
         sleep(20)
 def __validProxy__(self):
     """
     验证代理
     :return:
     """
     while 1:
         self.db.changeTable(self.useful_proxy_queue)
         for each_proxy in self.db.getAll():
             if validUsefulProxy(each_proxy):
                 self.log.debug('proxy: {} validation pass'.format(each_proxy))
             else:
                 self.db.delete(each_proxy)
                 self.log.info('proxy: {} validation fail'.format(each_proxy))
     self.log.info(u'代理验证程序运行正常')
Example #15
0
 def __validProxy__(self):
     """
     验证代理
     :return:
     """
     while 1:
         self.db.changeTable(self.useful_proxy_queue)
         for each_proxy in self.db.getAll():
             if validUsefulProxy(each_proxy):
                 self.log.debug(
                     'proxy: {} validation pass'.format(each_proxy))
             else:
                 self.db.delete(each_proxy)
                 self.log.info(
                     'proxy: {} validation fail'.format(each_proxy))
     self.log.info(u'代理验证程序运行正常')
Example #16
0
 def run(self):
     self.db.changeTable(self.useful_proxy_queue)
     while self.queue.qsize():
         try:
             proxy = self.queue.get()
         except Empty:
             break
         if validUsefulProxy(proxy):
             self.log.info(f'ProxyCheck: {proxy} validation pass')
         else:
             self.log.info(
                 f'ProxyCheck: {proxy} validation fail, delete it from useful_proxy!'
             )
             # self.db.delete(proxy)
             ProxyManager.delete_proxy(proxy)
         self.queue.task_done()
Example #17
0
    def __validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            self.db.changeTable(self.useful_proxy_queue)
            for each_proxy in self.db.getAll():
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')

                if validUsefulProxy(each_proxy):
                    self.log.debug('validProxy_b: {} validation pass'.format(each_proxy))
                else:
                    self.db.delete(each_proxy)
                    self.log.info('validProxy_b: {} validation fail'.format(each_proxy))
        self.log.info('validProxy_a running normal')
Example #18
0
    def run(self):
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            proxy_item = self.db.pop()
            while proxy_item:
                proxy = proxy_item.get('proxy')
                counter = proxy_item.get('value')
                if validUsefulProxy(proxy):
                    self.log.info(
                        'ProxyCheck: {} validation pass'.format(proxy))
                    self.db.put(proxy)
                else:
                    self.log.info(
                        'ProxyCheck: {} validation fail'.format(proxy))
                    self.db.delete(proxy)

                proxy_item = self.db.pop()
            sleep(30)
Example #19
0
 def valid_proxy(self):
     """
     valid_proxy
     :return:
     """
     self.db.changeTable(self.raw_proxy_queue)
     raw_proxy = self.db.pop()
     self.log.info('%s start valid proxy' % time.ctime())
     while raw_proxy:
         if validUsefulProxy(raw_proxy):
             self.db.changeTable(self.useful_proxy_queue)
             self.db.put(raw_proxy)
             self.log.debug('proxy: %s validation passes' % raw_proxy)
         else:
             self.log.debug('proxy: %s validation fail' % raw_proxy)
             pass
         self.db.changeTable(self.raw_proxy_queue)
         raw_proxy = self.db.pop()
     self.log.info('%s valid proxy complete' % time.ctime())
 def validProxy(self):
     """
     验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
     :return:
     """
     self.db.changeTable(self.raw_proxy_queue)
     raw_proxy = self.db.pop()
     self.log.info('%s start validProxy_a' % time.ctime())
     exist_proxy = self.db.getAll()
     while raw_proxy:
         if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy):
             self.db.changeTable(self.useful_proxy_queue)
             self.db.put(raw_proxy)
             self.log.info('validProxy_a: %s validation pass' % raw_proxy)
         else:
             self.log.debug('validProxy_a: %s validation fail' % raw_proxy)
         self.db.changeTable(self.raw_proxy_queue)
         raw_proxy = self.db.pop()
     self.log.info('%s validProxy_a complete' % time.ctime())
Example #21
0
 def run(self):
     self.db.changeTable(self.useful_proxy_queue)
     while self.queue.qsize():
         proxy = self.queue.get()
         count = self.item_dict[proxy]
         if validUsefulProxy(proxy):
             # 验证通过计数器减1
             if count and int(count) > 0:
                 self.db.put(proxy, num=int(count) - 1)
             else:
                 pass
             self.log.info('ProxyCheck: {} validation pass'.format(proxy))
         else:
             self.log.info('ProxyCheck: {} validation fail'.format(proxy))
             if count and int(count) + 1 >= FAIL_COUNT:
                 self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                 self.db.delete(proxy)
             else:
                 self.db.put(proxy, num=int(count) + 1)
         self.queue.task_done()
Example #22
0
    def validProxy(self):
        """
        验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue
        :return:
        """
        self.db.changeTable(self.raw_proxy_queue)
        raw_proxy = self.db.pop()

        while raw_proxy:
            self.log.info('ProxyRefreshSchedule: %s start validProxy' % time.ctime())
            addr = "%s:%s" % (raw_proxy.get('ip'), raw_proxy.get('port'))

            if validUsefulProxy(addr):
                self.db.changeTable(self.useful_proxy_queue)
                self.db.put(raw_proxy)
                self.log.info('ProxyRefreshSchedule: %s validation pass' % addr)
            else:
                self.log.info('ProxyRefreshSchedule: %s validation fail' % addr)
            self.db.changeTable(self.raw_proxy_queue)
            raw_proxy = self.db.pop()

        self.log.info('ProxyRefreshSchedule: %s validProxy complete' % time.ctime())
    def validProxy(self):
        """
        验证代理
        :return:
        """
        while True:
            # for num in range(5):
            self.db.changeTable(self.useful_proxy_queue)

            each_proxys = self.db.getAll()
            print "验证所有ip", each_proxys
            if not each_proxys:
                time.sleep(100)

            for each_proxy in each_proxys:
                if isinstance(each_proxy, bytes):
                    each_proxy = each_proxy.decode('utf-8')
                print "验证ip:", each_proxy
                if validUsefulProxy(each_proxy):
                    # 成功计数器加1
                    self.db.inckey(each_proxy, 1)
                    self.log.debug(
                        'validProxy_b: {} validation pass'.format(each_proxy))

                else:
                    # print "删除:",each_proxy
                    # self.db.delete(each_proxy)

                    # 失败计数器减一
                    self.db.inckey(each_proxy, -1)
                    # self.db.delete(each_proxy)
                    self.log.info(
                        'validProxy_b: {} validation fail'.format(each_proxy))
                value = self.db.getvalue(each_proxy)
                if value and int(value) < -1:
                    # 计数器小于-5删除该代理
                    self.db.delete(each_proxy)
        self.log.info('validProxy_a running normal')
Example #24
0
 def run(self):
     self.db.changeTable(self.useful_proxy_queue)
     while True:
         try:
             proxy = self.queue.get(block=False)
         except Empty:
             break
         count = self.item_dict[proxy]
         if validUsefulProxy(proxy):
             # 验证通过计数器减1
             if count and int(count) > 0:
                 self.db.put(proxy, num=int(count) - 1)
             else:
                 pass
             self.log.info('ProxyCheck: {} validation pass'.format(proxy))
         else:
             self.log.info('ProxyCheck: {} validation fail'.format(proxy))
             if count and int(count) + 1 >= FAIL_COUNT:
                 self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
                 self.db.delete(proxy)
             else:
                 self.db.put(proxy, num=int(count) + 1)
         self.queue.task_done()
Example #25
0
 def run(self):
     self.db.changeTable(self.useful_proxy_queue)
     while True:
         for proxy, count in self.db.getAll().items():
             if validUsefulProxy(proxy):
                 # 验证通过计数器减1
                 if count and int(count) > 0:
                     self.db.put(proxy, num=int(count) - 1)
                 else:
                     pass
                 self.log.info(
                     'ProxyCheck: {} validation pass'.format(proxy))
             else:
                 self.log.info(
                     'ProxyCheck: {} validation fail'.format(proxy))
                 if count and int(count) > FAIL_COUNT:
                     self.log.info(
                         'ProxyCheck: {} fail too many, delete!'.format(
                             proxy))
                     self.db.delete(proxy)
                 else:
                     self.db.put(proxy, num=int(count) + 1)
         sleep(60 * 5)
Example #26
0
    def validateProxy(self, proxy):

        return validUsefulProxy(proxy)