def http(self, _url, _proxyHttp, _proxyHost, _proxyPort): proxy_meta = "%(http)s://%(host)s:%(port)s" % { "http": _proxyHttp, "host": _proxyHost, "port": _proxyPort } proxies = { "http": proxy_meta, "https": proxy_meta, } try: # print(self.proxies) requests.adapters.DEFAULT_RETRIES = 3 s = requests.session() s.keep_alive = False req = requests.get(url=_url, headers=headers, proxies=proxies, timeout=5) req.encoding = 'utf-8' if req.status_code == 200: html = req.text com_html = etree.HTML(html) read_count = com_html.xpath('//span[@class="read-count"]') if read_count: # 输出:阅读数 print(proxy_meta, read_count[0].text) else: print("请求无响应:") except Exception as e: print("ip不可用:", proxy_meta) save_log("ip不可用:", proxy_meta, e)
def request_url(self, _url, proxy): proxy_meta = '' proxies = '' if proxy: proxy_meta, proxies = self.get_proxy_meta(proxy) try: s = requests.session() s.keep_alive = False req = requests.get(url=_url, headers=headers, proxies=proxies, timeout=5) req.encoding = 'utf-8' if req.status_code == 200: html = req.text com_html = etree.HTML(html) read_count = com_html.xpath('//span[@class="read-count"]') print(proxy_meta, read_count[0].text) else: print("请求无响应:") except Exception as e: self.remove_port(proxy) dom = _url.split('//')[1].split('.net')[0] print("ip不可用__%s__>>" % dom, proxy_meta, e) save_log("ip不可用__%s__>>" % dom, proxy_meta, e) finally: self.wait_time(2)
def scheduled_job_visit(): ''' 访问博客 采集频率:1分整倍数,就执行这个函数 ''' init_log(_log_path='', _log_name='scheduler.log', _filemode='a') save_log("_________visit start_________", now_datetime()) KuiDaiLi().start() save_log("_________visit end_________\n", now_datetime())
def job_brush_flow(): ''' 访问博客 采集频率:1分整倍数,就执行这个函数 ''' init_log(_log_path='', _log_name='brush_flow.log', _filemode='a') save_log("_________brush start_________", now_datetime()) mogu = MoGuRequest() mogu.start(_type='mogu') save_log("_________brush end_________\n", now_datetime())
def get_easy_json(self, _url=easy_url, proxy=''): headers['Accept'] = 'application/json; charset=utf-8' # 存储代理的列表 proxy_meta = '' proxies = '' _url_type = '' if proxy: proxy_meta, proxies = self.get_proxy_meta(proxy) try: req = requests.get(_url, headers=headers, proxies=proxies, timeout=5) # print("json>>", req.text) if req.status_code == 200: if 'amap.com' in _url: _url_type = '高德地图' jsonObj = req.json() if jsonObj.get("data"): name = jsonObj.get('data').get("base").get("name") elif jsonObj.get("poi_list"): name = jsonObj.get("poi_list")[0].get("name") print("json获取成功__%s__>>" % _url_type, proxy_meta, name) elif 'baidu' in _url: _url_type = '百度地图' name = req.json().get("result").get("what") print("json获取成功__%s__>>" % _url_type, proxy_meta, name) else: # easy-api _url_type = 'easy-mock' for ips in req.json().get("data"): protocol = 'https' if ips.get("Protocol"): protocol = ips.get("Protocol") self._proxy_list.append(protocol + '#' + ips.get("IP") + '#' + str(ips.get("Port"))) else: print("请求无响应:") except Exception as e: if _url_type: print("json解析错误__%s__>>" % _url_type, proxy_meta, e) save_log("json解析错误__%s__>>:" % _url_type, proxy_meta, e) else: dom = _url.split('//')[1].split('.com')[0] print("ip不可用__%s__>>" % dom, proxy_meta, e) finally: self.wait_time(2)
def e(self, info): self.err_log.error(info) def c(self, info): self.err_log.critical(info) # test_log = TestLog("test_log.log") # # # def d(info): # test_log.logger.debug(info) # def test(): # MyLog.info('This is info') # MyLog.warning('This is warning') # MyLog.error('This is error') if __name__ == '__main__': # 测试代码一: # d("dddd") # 测试代码二: # log = TestLog(console=True) # log.d("haha") # log.e("error") # 测试代码三:可以保存到固定位置,可是日志定位是logger/logger.py # test() # logging.error("输出12") save_log(1, '2')