def try_proxy(proxy, url='http://gaokao.chsi.com.cn', tag=u'阳光高考'): req = BasicRequests() req.set_proxy(proxy, 0, False) # con = req.request_url('http://gk.chsi.com.cn/recruit/listSpecBySchool.do?yxdm=11055&start=0 ') con = req.request_url(url, timeout=5) if con: m = re.search(r'<title>[^<]*<\/title>', con.text) if m: print m.group() return re.search(tag, con.text)
def test_proxy_speed(url, proxy, t=60): req = BasicRequests() req.set_proxy(proxy, 0, False) s = time.time() count = 0 while time.time() - s <= t: try: req.request_url(url, timeout=5) count += 1 except Exception: pass return count
def filter_with_speed(proxies, url='http://www.baidu.com', timeout=10): results = [] req = BasicRequests() for proxy in proxies: req.set_proxy(proxy, len(req.sp_proxies), False) try: con = req.request_url(url, timeout=timeout) except Exception: con = None if con: results.append(proxy) return results
def get_child_court(self, court): req = BasicRequests() req.set_proxy(self.proxy) time.sleep(1) print 'fetching child court', court['key'] con = req.request_url('http://wenshu.court.gov.cn/Index/GetChildAllCourt', data={'keyCodeArrayStr': court['key']}) if '<' in con.text: return court = self.parse_results(con) for c in court: self.child_courts.append(c)
def get_court(self): req = BasicRequests() req.set_proxy(self.proxy) for p in self.provinces: time.sleep(1) print 'fetch province', p['name'] con = req.request_url('http://wenshu.court.gov.cn/Index/GetCourt', data={'province': p['name']}) if '<' in con.text: print 'invalid response' continue court = self.parse_results(con) for c in court: self.courts.append(c)
def test_proxy(proxy, url, count=10): c = count rq = BasicRequests() rq.set_proxy(proxy['p']) total = 0 success = 0 while c > 0: try: s = time.time() con = rq.request_url(url) t = time.time() - s except: con = None t = 0 pass c -= 1 if con: success += 1 total += t if success > 0: proxy['v'] = total / success
#!/usr/bin/env python # -*- coding:utf8 -*- from spider.httpreq import BasicRequests if "__main__" == __name__: rq = BasicRequests() rq.set_proxy('106.75.134.190:18888:ipin:ipin1234') con = rq.request_url('http://www.zjsfgkw.cn/document/JudgmentDetail/4062962') if con: print con.text