Beispiel #1
0
class BaseTest(unittest.TestCase):
    def __init__(self, *pargs, **kwargs):
        unittest.TestCase.__init__(self, *pargs, **kwargs)

    def setUp(self):
        self.pl = GetProxy()

    def test_T1(self):
        """Check if database is empty"""
        self.assertTrue(check_if_database_is_empty(), "Empty database!")

    def test_T2(self):
        """Get one proxy"""
        buff = self.pl.get_proxy()
        self.assertTrue(buff.startswith("http"))

    def test_T3(self):
        """Get many proxy"""
        self.assertTrue(get_many_proxy(15, self.pl))

    def test_T4(self):
        """Testing if database contains only unique proxy addresses"""
        self.assertTrue(check_unique_addresses(),
                        "Not all addresses are unique.")

    def test_https(self):
        proxy = self.pl.get_https_proxy()
        self.assertTrue(check_https(proxy), "Not https")

    def test_proxy(self):
        """Generate proxy and test output"""
        self.shortDescription()
        proxy = self.pl.get_proxy()
        self.assertTrue(check_proxy(proxy), "%s doesn't match" % proxy)
Beispiel #2
0
class BaseTest(unittest.TestCase):
    def __init__(self, *pargs, **kwargs):
        unittest.TestCase.__init__(self, *pargs, **kwargs)

    def setUp(self):
        self.pl = GetProxy()

    def test_T1(self):
        """Check if database is empty"""
        self.assertTrue(check_if_database_is_empty(), "Empty database!")

    def test_T2(self):
        """Get one proxy"""
        buff = self.pl.get_proxy()
        self.assertTrue(buff.startswith("http"))

    def test_T3(self):
        """Get many proxy"""
        self.assertTrue(get_many_proxy(15, self.pl))

    def test_T4(self):
        """Testing if database contains only unique proxy addresses"""
        self.assertTrue(check_unique_addresses(), "Not all addresses are unique.")

    def test_https(self):
        proxy = self.pl.get_https_proxy()
        self.assertTrue(check_https(proxy), "Not https")

    def test_proxy(self):
        """Generate proxy and test output"""
        self.shortDescription()
        proxy = self.pl.get_proxy()
        self.assertTrue(check_proxy(proxy), "%s doesn't match" % proxy)
def main():
    get_proxy = GetProxy(ProxyItemsDropDB)
    while True:
        item = get_proxy.get_proxy()
        ret = valid_proxy(item)
        if ret:
            ProxyItemsDB.upsert_proxy_item(ret)
            ProxyItemsDropDB.remove_proxy_item(item)
            pass
        else:
            ProxyItemsDropDB.remove_proxy_item(item)
            ProxyItemsDropForeverDB.upsert_proxy_item(item)
Beispiel #4
0
class Spider(object):
    def __init__(self, num):
        self.num = num
        self._client = SiteClient()
        self._getProxy = GetProxy(2, num)
        self._getSearchKey = GetSearchKey()
        pass

    def _refresh_proxy(self):

        self._proxy_ip, self._proxy_port, proxy_type = self._getProxy.get_proxy(
        )
        http_proxy = "http://%s:%s" % (self._proxy_ip, self._proxy_port)
        proxies = {"http": http_proxy}
        logging.info("++++++++proxies: %s++++++++++++" % proxies)
        self._client = SiteClient(proxies)
        pass

    def run(self):
        try:
            # cur = QyxybaicDB.get_all()
            self._refresh_proxy()
            # is_need_refresh_proxy = 5
            while True:
                try:
                    # if is_need_refresh_proxy > 0:
                    #     is_need_refresh_proxy -= 1
                    # else:
                    #     self._refresh_proxy()
                    #     is_need_refresh_proxy = 3

                    reg_bus_ent_id = self._getSearchKey.get_reg_bus_ent_id()
                    # reg_bus_ent_id = item['reg_bus_ent_id']
                    logging.info("-------------%s--------------" %
                                 reg_bus_ent_id)
                    if QyxybaicLevel2DB.get_one(reg_bus_ent_id):
                        logging.info(
                            "----------------is have-------------------")
                        continue

                    company = {"reg_bus_ent_id": reg_bus_ent_id}
                    company_info = self.get_company(reg_bus_ent_id)
                    company.update(company_info)
                    QyxybaicLevel2DB.upsert_company_detail_level_2(company)
                except NeedrefreshProxyError, err:
                    self._refresh_proxy()
                    continue
                except ErrorStatusCode, err:
                    self._refresh_proxy()
                    continue
                except HttpClientError, err:
                    self._refresh_proxy()
                    continue
Beispiel #5
0
def start(args):
    db_type = args.database
    init(db_type)

    get_proxy = GetProxy(db_type)
    validate_origin = ValidateOrigin(db_type)
    test_available = TestAvailable(db_type)

    thread_get_proxy = threading.Thread(target=get_proxy.cycle_get,
                                        name="thread-get-ip")  # 定时从网站获取ip
    thread_validate_proxy = threading.Thread(
        target=validate_origin.cycle_validate,
        name="thread-validate-ip")  # 定时测试能用代理
    thread_test_proxy = threading.Thread(
        target=test_available.cycle_test,
        name="Thread-test-ip")  # 定时检查,剔除不能用的代理

    thread_get_proxy.start()
    thread_validate_proxy.start()
    thread_test_proxy.start()
 def __init__(self):
     self._client = SiteClient()
     self._getProxy = GetProxy(3, 0)
     self._getSearchKey = GetSearchKey()
     pass
Beispiel #7
0
 def __init__(self, num):
     self.num = num
     self._client = SiteClient()
     self._getProxy = GetProxy(2, num)
     self._getSearchKey = GetSearchKey()
     pass
Beispiel #8
0
 def setUp(self):
     self.pl = GetProxy()
Beispiel #9
0
 def __init__(self):
     self._client = None
     self._getProxy = GetProxy(4, 0)
     self._getSearchKey = GetSearchKey()
     pass
Beispiel #10
0
def get():
    return random.choice(list(useful_proxys))


@app.route('/get_all')
def get_all():
    return list(useful_proxys)


@app.route('/get_num')
def get_num():
    return len(useful_proxys)


if __name__ == '__main__':
    logger.info("开始爬取代理")
    proxys = GetProxy()
    proxys.get()
    # 获得没有经过过滤的ip代理
    logger.info("代理爬取完毕,一共爬取到{}条".format(len(origin_proxys)))

    # 接下来开始验证,验证完之后保存在文本中
    logger.info("接下来开始验证")
    proxy_check()
    logger.info("验证结束,有用的代理有{}条".format(len(useful_proxys)))
    logger.info(useful_proxys)

    with open("proxys.txt", 'w') as f:
        for proxy in useful_proxys:
            f.write(proxy + '\n')
    app.run(debug=True)
Beispiel #11
0
 def setUp(self):
     self.pl = GetProxy()
 def __init__(self, num):
     self.num = num
     self._client = None
     self._getProxy = GetProxy(1, num)
     self._getSearchKey = GetSearchKey()
     pass
Beispiel #13
0
        """
            Method should return List[Proxy] and it will use in GetProxy.get_proxies 
        """
        pass

    save_as_method = "txt"

    # based on attribute name save_as_{save_as_method}
    def save_as_txt(self, proxy_list: ProxyList) -> None:
        # save method will pass ProxyList
        pass


# get_proxy = ExtendedGetProxy(timeout=10, check_duplicates=False) # Will use get_from and check_proxy methods

# Default Usage

get_proxy = GetProxy(use_tqdm=True, check_duplicates=True, timeout=10, limit=5)
proxy_list = get_proxy.list  # Returns ProxyList
print(proxy_list.length)
proxy_list.all  # Returns List[Proxy]
proxy_list.filter(
)  # country_code: list=None, ssl_support :bool=None, google_passed:bool=None, use_limit :int=0
proxy_list.get().to_dict(
)  # country_code: list=None, ssl_support :bool=None, google_passed:bool=None, use_limit :int=0
print(proxy_list.first.to_dict())
print(proxy_list.last_used.to_dict())
print(proxy_list.next.to_dict())

get_proxy.save()  # To save