Esempio n. 1
0
def testProxyClass():
    proxy = Proxy("127.0.0.1:8080")

    print(proxy.to_json)

    proxy.source = "test"

    proxy_str = json.dumps(proxy.to_dict, ensure_ascii=False)

    print(proxy_str)

    print(Proxy.createFromJson(proxy_str).to_dict)
Esempio n. 2
0
def testExist():
    client = SqlClient()
    sql = 'select url from proxy where tag="b47w"'
    urls = [i[0] for i in client.engine.execute(sql).fetchall()]
    print(urls)
    proxy_queue = Queue()
    for url in urls:
        proxy = Proxy(url)
        proxy.tag = 'b47w'
        proxy_queue.put(proxy)
    checker = Checker('raw', proxy_queue, f"thread_01")
    checker.start()
    checker.join()
Esempio n. 3
0
 def freeProxy20():
     source = 'premproxy.com'
     urls = [
         'https://premproxy.com/list/ip-port/1.htm',
         'https://premproxy.com/list/ip-port/2.htm',
         'https://premproxy.com/list/ip-port/3.htm',
     ]
     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     for url in urls:
         tree = WebRequest().get(url, proxies=proxies).tree
         if tree is None:
             return None
         ret = tree.xpath('//ul[@id="ipportlist"]/li')
         for r in ret:
             try:
                 ip = r.xpath('./li/text()')[0][:-1]
                 # ip_mask = re.search('(?:")(.*)(?:")',
                 #                     ip_script).groups()[0]
                 # ip = re.search('(?:>)([0-9\.]+)(?:<)',
                 #                unquote(ip_mask, 'utf8')).groups()[0]
                 port = r.xpath('./li/span/text()')[0]
                 protocol = 'https'
                 yield Proxy(f'{protocol}://{ip}:{port}', source=source)
             except Exception as e:
                 print(type(e), e)
Esempio n. 4
0
 def getAll(self):
     """
     get all proxy from pool as Proxy list
     :return:
     """
     proxies_dict = self.db.getAll()
     return [Proxy.createFromJson(value) for _, value in proxies_dict.items()]
def testRedisClient():
    from db.dbClient import DbClient
    from helper.proxy import Proxy

    uri = "redis://:[email protected]:6379"
    db = DbClient(uri)
    db.changeTable("use_proxy")
    proxy = Proxy.createFromJson(
        '{"proxy": "27.38.96.101:9797", "fail_count": 0, "region": "", "type": "",'
        ' "source": "freeProxy03", "check_count": 0, "last_status": "", "last_time": ""}'
    )

    print("put: ", db.put(proxy))

    print("get: ", db.get())

    print("exists: ", db.exists("27.38.96.101:9797"))

    print("exists: ", db.exists("27.38.96.101:8888"))

    print("pop: ", db.pop())

    print("getAll: ", db.getAll())

    print("getCount", db.getCount())
Esempio n. 6
0
 def freeProxy16():
     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     source = 'free-proxy.cz'
     urls = [
         'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all',
         'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/2',
         'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/3',
         'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/4',
         'http://free-proxy.cz/en/proxylist/country/all/socks5/ping/all/5',
     ]
     for url in urls:
         r = WebRequest().get(url, proxies=proxies)
         if r.response.status_code == 200:
             ret = r.tree
             for tr in ret.xpath('//table[@id="proxy_list"]//tr')[1:]:
                 try:
                     ip_script = tr.xpath('./td[1]/script/text()')[0]
                     ip_base64 = re.search('(?:")([\w=]+)(?:")',
                                           ip_script).groups()[0]
                     ip = base64.b64decode(ip_base64).decode('utf8')
                     port = tr.xpath('./td[2]/span/text()')[0]
                     protocol = ''.join(tr.xpath('./td[3]/small/text()'))
                     yield Proxy(f'{protocol}://{ip}:{port}', source=source)
                 except Exception as e:
                     print(e)
 def getAll(self, https=False):
     """
     get all proxy from pool as Proxy list
     :return:
     """
     proxies = self.db.getAll(https)
     return [Proxy.createFromJson(_) for _ in proxies]
Esempio n. 8
0
def _runProxyFetch():
    proxy_queue = Queue()

    for proxy in runFetcher():
        proxy_queue.put(Proxy(proxy).to_json)

    runChecker("raw", proxy_queue)
Esempio n. 9
0
 def update_fail_count(self, proxy_str):
     proxy = self.db.get(proxy_str)
     if not proxy:
         return None
     proxy = Proxy.createFromJson(proxy)
     proxy.fail_count += 1
     self.db.update(proxy)
     return proxy
Esempio n. 10
0
 def get(self, https=False):
     """
     return a proxy
     Args:
         https: True/False
     Returns:
     """
     proxy = self.db.get(https)
     return Proxy.createFromJson(proxy) if proxy else None
Esempio n. 11
0
def _runProxyFetch():
    proxy_queue = Queue()
    proxy_handler = ProxyHandler()
    # 当代理池中数量小于poolSizeMin时,再抓取
    if proxy_handler.db.getCount() < proxy_handler.conf.poolSizeMin:
        for proxy in runFetcher():
            proxy_queue.put(Proxy(proxy).to_json)

        runChecker("raw", proxy_queue)
Esempio n. 12
0
 def getByTag(self, tag):
     """
     return a useful proxy by tag
     :return:
     """
     proxy = self.db.getByTag(tag)
     if proxy:
         return Proxy.createFromJson(proxy)
     return None
Esempio n. 13
0
 def get(self):
     """
     return a useful proxy
     :return:
     """
     proxy = self.db.get()
     if proxy:
         return Proxy.createFromJson(proxy)
     return None
Esempio n. 14
0
 def pop(self):
     """
     return and delete a useful proxy
     :return:
     """
     proxy = self.db.pop()
     if proxy:
         return Proxy.createFromJson(proxy)
     return None
Esempio n. 15
0
def testMysqlClient():
    db = SqlClient()
    proxy = Proxy('socks://127.0.0.1:1000', tag='test')

    # 增加
    # db.put(proxy)

    # 取一个
    geted = db.get('default')
    print(geted)
Esempio n. 16
0
 def getAll(self, tag=None):
     """
     字典形式返回所有代理, 使用changeTable指定hash name
     :return:
     """
     sql = 'select url, score, proxy_type, tag from proxy'
     if tag is not None:
         sql += f' where tag="{tag}"'
     df = pd.read_sql(sql, self.engine)
     proxies = []
     for index, row in df.iterrows():
         proxy = Proxy.createFromJson(row.to_json())
         proxies.append(proxy)
     return proxies
Esempio n. 17
0
 def run(self):
     self.log.info("ProxyFetch - {func}: start".format(func=self.fetch_source))
     try:
         for proxy in self.fetcher():
             self.log.info('ProxyFetch - %s: %s ok' % (self.fetch_source, proxy.ljust(23)))
             proxy = proxy.strip()
             if proxy in self.proxy_dict:
                 self.proxy_dict[proxy].add_source(self.fetch_source)
             else:
                 self.proxy_dict[proxy] = Proxy(
                     proxy, source=self.fetch_source)
     except Exception as e:
         self.log.error("ProxyFetch - {func}: error".format(func=self.fetch_source))
         self.log.error(str(e))
Esempio n. 18
0
def testRedisClient():
    from db.dbClient import DbClient
    from helper.proxy import Proxy

    uri = "redis://:[email protected]:6379"
    db = DbClient(uri)
    db.changeTable("use_proxy")
    proxy = Proxy.createFromJson(
        '{"proxy": "27.38.96.101:9797", "fail_count": 0, "region": "", "type": "",'
        ' "source": "freeProxy03", "check_count": 0, "last_status": "", "last_time": ""}')

    # print("put: ", db.put(proxy))

    # print("put: ", db.putTag(tag='test', proxy='1238:7'))
    # print("del: ", db.deleteTag(tag='test', proxy='1234'))
    print("get: ", db.getByTag(tag='test'))
Esempio n. 19
0
def testProxyClass1():
    proxy = Proxy("https://127.0.0.1:8080")

    print(proxy.to_json)

    proxy.score = 10
    proxy.proxy_type = "test"
    proxy.tag = "高匿"

    proxy_str = json.dumps(proxy.to_dict, ensure_ascii=False)

    print(proxy_str)

    print(Proxy.createFromJson(proxy_str).to_dict)
Esempio n. 20
0
    def run(self):
        self.log.info("ProxyCheck - {}  : start".format(self.name))
        while True:
            try:
                proxy_json = self.queue.get(block=False)
            except Empty:
                self.log.info("ProxyCheck - {}  : complete".format(self.name))
                break

            proxy = Proxy.createFromJson(proxy_json)
            proxy = proxyCheck(proxy)
            if self.type == "raw":
                if proxy.last_status:
                    if self.proxy_handler.exists(proxy):
                        self.log.info('ProxyCheck - {}  : {} exists'.format(
                            self.name, proxy.proxy.ljust(23)))
                    else:
                        self.log.info('ProxyCheck - {}  : {} success'.format(
                            self.name, proxy.proxy.ljust(23)))
                        self.proxy_handler.put(proxy)
                else:
                    self.log.info('ProxyCheck - {}  : {} fail'.format(
                        self.name, proxy.proxy.ljust(23)))
            else:
                if proxy.last_status:
                    self.log.info('ProxyCheck - {}  : {} pass'.format(
                        self.name, proxy.proxy.ljust(23)))
                    self.proxy_handler.update(proxy)
                else:
                    if proxy.fail_count > self.conf.maxFailCount:
                        self.log.info(
                            'ProxyCheck - {}  : {} fail, count {} delete'.
                            format(self.name, proxy.proxy.ljust(23),
                                   proxy.fail_count))
                        self.proxy_handler.delete(proxy)
                    else:
                        self.log.info(
                            'ProxyCheck - {}  : {} fail, count {} keep'.format(
                                self.name, proxy.proxy.ljust(23),
                                proxy.fail_count))
                        self.proxy_handler.update(proxy)
            self.queue.task_done()
Esempio n. 21
0
 def freeProxy17():
     source = 'www.proxynova.com'
     urls = [
         'https://www.proxynova.com/proxy-server-list/elite-proxies/',
     ]
     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     for url in urls:
         tree = WebRequest().get(url, proxies=proxies).tree
         if tree is None:
             return None
         ret = tree.xpath('//*[@id="tbl_proxy_list"]/tbody/tr')
         for r in ret:
             try:
                 ip_script = r.xpath('./td[1]/abbr/script/text()')[0]
                 ip = re.search('(?:\')(.+)(?:\')', ip_script).groups()[0]
                 port = r.xpath('./td[2]/text()')[0].strip()
                 protocol = 'https'
                 yield Proxy(f'{protocol}://{ip}:{port}', source=source)
             except Exception as e:
                 print(e)
Esempio n. 22
0
    def run(self):
        """
        fetch proxy with proxyFetcher
        :return:
        """
        proxy_dict = dict()
        self.log.info("ProxyFetch : start")
        for fetch_source in self.conf.fetchers:
            self.log.info(
                "ProxyFetch - {func}: start".format(func=fetch_source))
            fetcher = getattr(ProxyFetcher, fetch_source, None)
            if not fetcher:
                self.log.error(
                    "ProxyFetch - {func}: class method not exists!".format(
                        func=fetch_source))
                continue
            if not callable(fetcher):
                self.log.error(
                    "ProxyFetch - {func}: must be class method".format(
                        func=fetch_source))
                continue

            try:
                for proxy in fetcher():
                    self.log.info('ProxyFetch - %s: %s ok' %
                                  (fetch_source, proxy.ljust(23)))
                    proxy = proxy.strip()
                    if proxy in proxy_dict:
                        proxy_dict[proxy].add_source(fetch_source)
                    else:
                        proxy_dict[proxy] = Proxy(proxy, source=fetch_source)
            except Exception as e:
                self.log.error(
                    "ProxyFetch - {func}: error".format(func=fetch_source))
                self.log.error(str(e))
        self.log.info("ProxyFetch - all complete!")
        for _ in proxy_dict.values():
            if DoValidator.preValidator(_.proxy):
                yield _
Esempio n. 23
0
 def freeProxy21():
     source = 'www.proxyranker.com'
     urls = [
         'https://www.proxyranker.com/china/list/',
         'https://www.proxyranker.com/china/list-2/',
         'https://www.proxyranker.com/china/list-3/',
         'https://www.proxyranker.com/china/list-4/',
     ]
     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     for url in urls:
         tree = WebRequest().get(url, proxies=proxies).tree
         if tree is None:
             return None
         ret = tree.xpath('//div[@class="bl"]//tr')[1:]
         for r in ret[:-1]:
             try:
                 ip = r.xpath('./td[1]/text()')[0]
                 port = r.xpath('./td[4]/span/text()')[0]
                 protocol = 'https'
                 yield Proxy(f'{protocol}://{ip}:{port}', source=source)
             except Exception as e:
                 print(type(e), e)
def testRedisClient():
    from db.dbClient import DbClient
    from helper.proxy import Proxy

    uri = "redis://:[email protected]:6379"
    db = DbClient(uri)
    db.changeTable("use_proxy")
    proxy = Proxy.createFromJson('{"proxy": "118.190.79.36:8090", "https": false, "fail_count": 0, "region": "", "anonymous": "", "source": "freeProxy14", "check_count": 4, "last_status": true, "last_time": "2021-05-26 10:58:04"}')

    print("put: ", db.put(proxy))

    print("get: ", db.get(https=None))

    print("exists: ", db.exists("27.38.96.101:9797"))

    print("exists: ", db.exists("27.38.96.101:8888"))

    print("pop: ", db.pop(https=None))

    print("getAll: ", db.getAll(https=None))

    print("getCount", db.getCount())
Esempio n. 25
0
 def freeProxy19():
     source = 'www.freeproxylists.net'
     urls = [
         'http://www.freeproxylists.net/zh/?c=&pt=&pr=HTTPS&a%5B%5D=0&a%5B%5D=1&a%5B%5D=2&u=50',
     ]
     proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     for url in urls:
         tree = WebRequest().get(url, proxies=proxies).tree
         if tree is None:
             return None
         ret = tree.xpath('//tr')[4:]
         for r in ret:
             try:
                 ip_script = r.xpath('./td[1]/script/text()')[0]
                 ip_mask = re.search('(?:")(.*)(?:")',
                                     ip_script).groups()[0]
                 ip = re.search('(?:>)([0-9\.]+)(?:<)',
                                unquote(ip_mask, 'utf8')).groups()[0]
                 port = r.xpath('./td[2]/text()')[0]
                 protocol = r.xpath('./td[3]/text()')[0]
                 yield Proxy(f'{protocol}://{ip}:{port}', source=source)
             except Exception as e:
                 print(type(e), e)
Esempio n. 26
0
 def freeProxy18():
     source = 'spys.one'
     urls = [
         'https://spys.one/en/free-proxy-list/',
     ]
     # proxies = {'http': MAINPROXY, 'https': MAINPROXY}
     chrome_options = webdriver.ChromeOptions()
     chrome_options.add_argument('--headless')
     chrome_options.add_argument('--disable-gpu')
     chrome_options.add_argument('--proxy-server=' + MAINPROXY)
     client = webdriver.Chrome(options=chrome_options)
     for url in urls:
         client.get(url)
         trs = client.find_elements_by_xpath(
             '//table/tbody/tr/td/table/tbody/tr')
         for r in trs[3:]:
             try:
                 tds = r.find_elements_by_tag_name('td')
                 ip = tds[0].find_element_by_xpath('./font').text
                 protocol = tds[1].text.split(' ')[0]
                 yield Proxy(f'{protocol}://{ip}', source=source)
             except Exception as e:
                 print(e)
     client.close()
Esempio n. 27
0
def delete():
    proxy = request.args.get('proxy')
    status = proxy_handler.delete(Proxy(proxy))
    return {"code": 0, "src": status}
Esempio n. 28
0
def _runProxyFetch():
    # proxy_queue = Queue()
    proxy_handler = ProxyHandler()
    for proxy in runFetcher():
        proxy_handler.db.put(Proxy(proxy))