def freeProxySecond(proxy_number=100):
        """
        crawl 66 proxy http://www.66ip.cn/
        :param proxy_number: number of the proxies
        :return:
        """

        url = "http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&" \
              "ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=".format(proxy_number)

        request = WebRequest()
        html = request.get(url).text # decoded string
        for proxy in re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}', html):
            yield proxy
    def freeProxySixth():
        """
        crawl xun proxy http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10
        :return:
        """

        url = 'http://www.xdaili.cn/ipagent/freeip/getFreeIps?page=1&rows=10'

        request = WebRequest()
        try:
            res = request.get(url).json()
            for row in res['RESULT']['rows']:
                yield '{}:{}'.format(row['ip'], row['port'])
        except:
            pass
Beispiel #3
0
 def get_response(self, company_name, qyxx_id=None, pageNum = 1):
     self.post_data['trdDataRequest']['name'] = company_name
     self.post_data['trdDataRequest']['pageNum'] = pageNum
     self.post_data['companyName'] = company_name
     self.post_data['companyId'] = qyxx_id
     time.sleep(0.3)
     resp_text = WebRequest().post_data_json(self.request_url, data=json.dumps(self.post_data)).text
     resp_json = json.loads(resp_text)
     # print(resp_json)
     return resp_json
def getHtmlTree(url):
    """
    get html tree
    :param url:
    :return:
    """

    header = {'Connection': 'keep-alive',
              'Cache-Control': 'max-age=0',
              'Upgrade-Insecure-Requests': '1',
              'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
              'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
              'Accept-Encoding': 'gzip, deflate, sdch',
              'Accept-Language': 'zh-CN,zh;q=0.8',
              }

    wr = WebRequest()
    time.sleep(2)
    html = wr.get(url=url, header=header).content
    return etree.HTML(html)
Beispiel #5
0
 def test_execute_with_success_response(self, mock_urlopen):
     mock_urlopen.return_value.status = 200
     wr = WebRequest("http://www.google.com")
     self.assertEqual(wr.execute(), "SUCCESS")
Beispiel #6
0
 def test_execute_with_failure_response(self, mock_urlopen):
     mock_urlopen.return_value.status = 404
     wr = WebRequest("http://www.google.com")
     self.assertEqual(wr.execute(), "FAILURE")