Esempio n. 1
0
    def run(self):
        self.log.info("RawProxyCheck - {}  : start".format(self.name))
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            try:
                proxy_json = self.queue.get(block=False)
            except Empty:
                self.log.info("RawProxyCheck - {}  : exit".format(self.name))
                break

            proxy_obj = Proxy.newProxyFromJson(proxy_json)

            proxy_obj, status = checkProxyUseful(proxy_obj, self.origin_ips)
            if status:
                if self.db.exists(proxy_obj.proxy):
                    self.log.info(
                        'RawProxyCheck - {}  : {} validation exists'.format(
                            self.name, proxy_obj.proxy.ljust(20)))
                else:
                    self.db.put(proxy_obj)
                    self.log.info(
                        'RawProxyCheck - {}  : {} validation pass'.format(
                            self.name, proxy_obj.proxy.ljust(20)))
            else:
                self.log.info(
                    'RawProxyCheck - {}  : {} validation fail'.format(
                        self.name, proxy_obj.proxy.ljust(20)))
            self.queue.task_done()
Esempio n. 2
0
    def run(self):
        self.log.info("UsefulProxyCheck - {}  : start".format(self.name))
        self.db.changeTable(self.useful_proxy_queue)
        while True:
            try:
                proxy_str = self.queue.get(block=False)
            except Empty:
                self.log.info("UsefulProxyCheck - {}  : exit".format(
                    self.name))
                break

            proxy_obj = Proxy.newProxyFromJson(proxy_str)
            proxy_obj, status = checkProxyUseful(proxy_obj)
            if status or proxy_obj.fail_count < FAIL_COUNT:
                if self.db.exists(proxy_obj.proxy):
                    self.log.info(
                        'UsefulProxyCheck - {}  : {} validation exists'.format(
                            self.name, proxy_obj.proxy.ljust(20)))
                self.db.put(proxy_obj)
                self.log.info(
                    'UsefulProxyCheck - {}  : {} validation pass'.format(
                        self.name, proxy_obj.proxy.ljust(20)))
            else:
                self.log.info(
                    'UsefulProxyCheck - {}  : {} validation fail'.format(
                        self.name, proxy_obj.proxy.ljust(20)))
                self.db.delete(proxy_obj.proxy)
            self.queue.task_done()
Esempio n. 3
0
 def getAll(self):
     """
     get all proxy from pool as list
     :return:
     """
     self.db.changeTable(self.useful_proxy_queue)
     item_list = self.db.getAll()
     return [Proxy.newProxyFromJson(_) for _ in item_list]
Esempio n. 4
0
 def get(self):
     """
     return a useful proxy
     :return:
     """
     self.db.changeTable(self.useful_proxy_queue)
     item_list = self.db.getAll()
     if item_list:
         random_choice = random.choice(item_list)
         return Proxy.newProxyFromJson(random_choice)
     return None
Esempio n. 5
0
def testProxyClass():
    proxy = Proxy("127.0.0.1:8080")

    print(proxy.info_dict)

    proxy.source = "test"

    proxy_str = json.dumps(proxy.info_dict, ensure_ascii=False)

    print(proxy_str)

    print(Proxy.newProxyFromJson(proxy_str).info_dict)
Esempio n. 6
0
    def get_http(self):
        """
        return a http proxy
        :return:
        """
        self.db.changeTable(self.useful_proxy_queue)
        item_list = self.db.getAll()

        if item_list:
            for _ in item_list:
                random_choice = random.choice(item_list)
                proxy_type = json.loads(random_choice)['proxy'].split("://")[0]

                if proxy_type == 'http':
                    return Proxy.newProxyFromJson(random_choice)

        return None
Esempio n. 7
0
    def getAllByName(self, name):
        all_proxies = self.getAll()

        self.db.changeTable(self.useful_proxy_queue + '_fail_' + name)
        fail_list = self.db.getAll()
        fail_proxies = [Proxy.newProxyFromJson(_) for _ in fail_list]

        # todo: 优化
        filter_proxies = []
        for proxy in all_proxies:
            isFailed = False
            for failed in fail_proxies:
                if failed.proxy == proxy.proxy:
                    failed_date = datetime.strptime(failed.last_time,
                                                    "%Y-%m-%d %H:%M:%S")
                    if failed_date + timedelta(hours=24) > datetime.now():
                        isFailed = True
                    break
            if not isFailed:
                filter_proxies.append(proxy)

        return filter_proxies
Esempio n. 8
0
 def get_all_proxy(self):
     self.db.changeTable(self.asdl_proxy_queue)
     item_list = self.db.getAll()
     return [Proxy.newProxyFromJson(_) for _ in item_list]