class ProxyRefreshSchedule(ProxyManager): """ 代理定时刷新 """ def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule') def validProxy(self): """ 验证raw_proxy_queue中的代理, 将可用的代理放入useful_proxy_queue :return: """ self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s start validProxy_a' % time.ctime()) exist_proxy = self.db.getAll() while raw_proxy: if validUsefulProxy(raw_proxy) and (raw_proxy not in exist_proxy): self.db.changeTable(self.useful_proxy_queue) self.db.put(raw_proxy) self.log.info('validProxy_a: %s validation pass' % raw_proxy) else: self.log.debug('validProxy_a: %s validation fail' % raw_proxy) self.db.changeTable(self.raw_proxy_queue) raw_proxy = self.db.pop() self.log.info('%s validProxy_a complete' % time.ctime())
def __init__(self, dbtype='sqlit'): """ :param dbtype: 选择数据库类型 """ self.log = LogHandler("db") DBCONFIG = DBConfig().get_db_config(dbtype) ROOT_PATH = os.path.join(os.path.dirname(os.path.abspath(CURRENT_PATH)), DBCONFIG.get('path')) DB_NAME = DBCONFIG.get("dbname") DB_PATH = os.path.join(ROOT_PATH,DB_NAME) print(DB_PATH) self.conn = sqlite3.connect(DB_PATH) self.c = self.conn.cursor()
def __init__(self, dbtype): """ 创建数据库 :param dbtype: 数据库类型 """ self.log = LogHandler("db") dbconfig = DBConfig().get_db_config(dbtype) # self.connection = pymysql.connect( # **dbconfig, # ) if dbtype == "mysql": # print("ok") self.connection = pymysql.connect(**dbconfig, )
def __init__(self, urltype): """ :param urltype: 0-国内高匿代理IP;1-国内透明代理IP;2-国内HTTPS代理IP;3-国外高匿代理IP """ url_list = { 0: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=1', 1: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=2', 2: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=3', 3: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=4', } if urltype in [0, 1, 2, 3]: self.url = url_list.get(urltype) self.ua = UserAgent() self.sqlite = SqliteClient() self.sqlite.create_table_sqlite() self.log = LogHandler("db")
class ProxyManager(object): def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy' def refresh(self): for proxyGetter in self.config.proxy_getter_functions: proxy_set = set() for proxy in getattr(GeteFreeProxy, proxyGetter.strip())(): if proxy.strip(): self.log.info('{func}: fetch proxy {proxy}'.format( func=proxyGetter, proxy=proxy)) proxy_set.add(proxy.strip()) self.db.changeTable(self.raw_proxy_queue) for proxy in proxy_set: self.db.put(proxy) def get(self): self.db.changeTable(self.useful_proxy_queue) return self.db.get() def delete(self, proxy): self.db.changeTable(self.useful_proxy_queue) self.db.delete(proxy) def getAll(self): self.db.changeTable(self.useful_proxy_queue) return self.db.getAll() def get_status(self): self.db.changeTable(self.raw_proxy_queue) total_raw_proxy = self.db.get_status() self.db.changeTable(self.useful_proxy_queue) total_useful_queue = self.db.get_status() return { 'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue }
class ProxyValidSchedule(ProxyManager): def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule') def __validProxy(self): """ 验证代理 :return: """ while True: self.db.changeTable(self.useful_proxy_queue) for each_proxy in self.db.getAll(): if isinstance(each_proxy, bytes): each_proxy = each_proxy.decode('utf-8') if validUsefulProxy(each_proxy): # 成功计数器加1 self.db.inckey(each_proxy, 1) self.log.debug( 'validProxy_b: {} validation pass'.format(each_proxy)) else: # 失败计数器减一 self.db.inckey(each_proxy, -1) # self.db.delete(each_proxy) self.log.info( 'validProxy_b: {} validation fail'.format(each_proxy)) value = self.db.getvalue(each_proxy) if value and int(value) < -5: # 计数器小于-5删除该代理 self.db.delete(each_proxy) self.log.info('validProxy_a running normal') def main(self): self.__validProxy()
def __init__(self): """ 构造默认 header request session """ self.header = { "Accept": "*/*", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded", "Host": "music.163.com", "Referer": "http://music.163.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" } self.session = requests.session() self.log = LogHandler('NeteaseApi')
Date : 2019/12/1 ------------------------------------------------- """ import platform from flask import Flask, request from common.Response import Response from config.Getter import config from service.MusicooService import MusicooService from util.LogHandler import LogHandler app = Flask(__name__) log = LogHandler('Musicoo') @app.route('/', methods=['GET']) def index(): return 'index' @app.route('/netease/song/<song_id>/url', methods=['GET']) def song_url(song_id): """ 获取音乐链接 /netease/song/1379444316/url :param song_id: :return: """
-------------------------------------------- """ __author__ = 'wanglin' import datetime import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart # 引入外部文件 from jinja2html import create_html from monitorSpider import get_info from util.LogHandler import LogHandler from util.DBManager import get_table_count log = LogHandler('mailsend') _sender_address = '*****@*****.**' _reciver_address = '*****@*****.**' _subject = u'平台报告-【%s】' % datetime.datetime.now().strftime('%Y-%m-%d') _passwd = 'passwd' _smtpadd = 'smtp.ruifucredit.com' def sendMail(sender, reciver, subject, content, passwd, smtpadd): log.info('Start to initialize the mail message.') username = sender password = passwd msg = MIMEMultipart('related') msg['Subject'] = subject # html格式 html = content
class NetEase(object): def __init__(self): """ 构造默认 header request session """ self.header = { "Accept": "*/*", "Accept-Encoding": "gzip,deflate,sdch", "Accept-Language": "zh-CN,zh;q=0.8,gl;q=0.6,zh-TW;q=0.4", "Connection": "keep-alive", "Content-Type": "application/x-www-form-urlencoded", "Host": "music.163.com", "Referer": "http://music.163.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" } self.session = requests.session() self.log = LogHandler('NeteaseApi') def _raw_request(self, method, url, data=None): """ 实际发起请求方法 :param method: POST | GET :param url: url :param data: 请求携带的数据 :return: response """ if method == "GET": response = self.session.get(url, params=data, headers=self.header, timeout=DEFAULT_TIMEOUT) elif method == "POST": response = self.session.post(url, data=data, headers=self.header, timeout=DEFAULT_TIMEOUT) return response def _get_form_data(self, encrypt_data): """ 获取加密后的 form data 参数 :param encrypt_data: 待加密的参数 :return: 加密后的参数 {"params":"", "encSecKey":""} """ key = netease.create_key(16) return { "params": netease.aes(netease.aes(encrypt_data, netease.NONCE), key), "encSecKey": netease.rsa(key, netease.PUBKEY, netease.MODULUS) } def request(self, method, path, data={}, default={"code": -1}): """ 统一请求方法 :param method: POST | GET :param path: 路径 :param data: 未加密的 data :param default: 默认的 response :return: response """ url = "{}{}".format(BASE_URL, path) response = default csrf_token = "" data.update({"csrf_token": csrf_token}) params = self._get_form_data(json.dumps(data).encode('utf-8')) try: self.log.debug( '[Netease api] url: {};\trequest data: {};\tparams: {}'. format(url, data, params)) response = self._raw_request(method, url, params) response = response.json() self.log.debug('[Netease api] url: {};\tresponse data: {}'.format( url, response)) except requests.exceptions.RequestException as e: self.log.error('[Netease api] request error: {}'.format(e)) except ValueError as e: self.log.error( "[Netease api] request error; Path: {}, response: {}".format( path, response.text[:200])) finally: return response def songs_url(self, song_id): """ 获取音乐的实际 url,外链 {ids: "[514235010]", level: "standard", encodeType: "aac", csrf_token: ""} :param song_id: 音乐 id :return: 带有外链的 json 串 """ path = "/weapi/song/enhance/player/url/v1?csrf_token=" params = { 'ids': '[' + str(song_id) + ']', 'level': 'standard', 'encodeType': 'aac', 'csrf_token': '' } return self.request(POST, path, params) def songs_lyric(self, song_id): """ 获取音乐歌词 {id: "186453", lv: -1, tv: -1, csrf_token: ""} :param song_id: :return: """ path = "/weapi/song/lyric?csrf_token=" params = {'id': str(song_id), 'lv': -1, 'tv': -1, 'csrf_token': ''} return self.request(POST, path, params) def songs_search(self, keyword, offset=0, limit=30): """ 搜索音乐 按照关键字搜索一般就用这个 {hlpretag: "<span class="s-fc7">", hlposttag: "</span>", s: "春夏秋冬 张国荣", type: "1", offset: "0", …} :return: """ path = '/weapi/cloudsearch/get/web?csrf_token=' params = { 'csrf_token': '', 'hlposttag': '</span>', 'hlpretag': '<span class="s-fc7">', 'limit': str(limit), 'offset': str(offset), 's': str(keyword), 'total': 'true', 'type': '1' } return self.request(POST, path, params) def songs_search_(self, song): """ 搜索音乐,搜索框联动接口,不常用 {s: "春夏秋冬", limit: "8", csrf_token: ""} :return: """ path = "/weapi/search/suggest/web?csrf_token=" params = {'s': str(song), 'limit': 8, 'csrf_token': ''} return self.request(POST, path, params) def songs_detail(self, song_id): """ 获取歌曲详情 给定 song id {id: "186453", c: "[{"id":"186453"}]", csrf_token: ""} :param song_id: 必传参数,song id :return: Song """ path = "/weapi/v3/song/detail?csrf_token=" params = { 'id': str(song_id), 'c': "[{'id': " + str(song_id) + "}]", 'csrf_token': '' } return self.request(POST, path, params)
def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('refresh_schedule')
def __init__(self): self.db = DbClient() self.config = GetConfig() self.raw_proxy_queue = 'raw_proxy' self.log = LogHandler('proxy_manager') self.useful_proxy_queue = 'useful_proxy'
class DBConfig(object): def __init__(self, ): self.config = ConfigParser() self.name = "config.ini" self.sql_path = os.path.join(ROOT_PATH, self.name) self.log = LogHandler("db") def add_db_config(self, dbtype, host, port, user, password, database, charset): """ 增加或修改数据库配置,配置文件位置config/config.ini :param dbtype: 数据库类型 :param host: 主机 :param port: 端口 :param user: 用户名 :param password: 密码 :param database: 数据库名称 :param charset: 字符集 :return: True 增加或修改成功 """ self.config.read(self.sql_path, encoding="utf-8") if dbtype in self.config: # TODO 设置数据库配置 self.config.set(dbtype, "host", host) self.config.set(dbtype, "port", port) self.config.set(dbtype, "user", user) self.config.set(dbtype, "password", password) self.config.set(dbtype, "database", database) self.config.set(dbtype, "charset", charset) with open(self.sql_path, "w", encoding="utf8") as f: self.config.write(f) self.log.info( "Amend the success , Modifying the data %s" % [dbtype, host, port, user, password, database, charset]) return True else: # TODO 修改数据库配置 self.config.add_section(dbtype) self.config.set(dbtype, "host", host) self.config.set(dbtype, "port", port) self.config.set(dbtype, "user", user) self.config.set(dbtype, "password", password) self.config.set(dbtype, "database", database) self.config.set(dbtype, "charset", charset) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info( "Amend the success , Modifying the data %s" % [dbtype, host, port, user, password, database, charset]) return True def get_db_config(self, dbtyep): """ 返回数据库相关配置 :param dbtyep: 数据库类型 :return: dict(数据库配置) None不存在 """ # TODO 获取配置 self.config.read(self.sql_path, encoding="utf-8") if dbtyep in self.config: options = self.config.items(dbtyep) option = {x: y for x, y in options} for k, v in option.items(): if k == "port": option[k] = int(v) self.log.info("success %s" % option) return option else: self.log.error("Parameter error %s" % dbtyep) return None def update_config(self, section, option, value): """ 根据传入参数修改相关配置 :param section: 块 :param option: 修改key :param value: 修改值 :return: True 修改成功 False 参数错误 """ # TODO 新增其他配置 self.config.read(self.sql_path, encoding="utf-8") if section in self.config.sections(): if option in self.config.options(section): self.config.set(section, option, value) # return '需要修改' self.log.info("Need to be modified") else: self.log.error("Parameter error %s" % option) return None else: self.log.error("Parameter error %s" % section) return None with open(self.sql_path, "w", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success") return True def add_config(self, section, option, value): """ 独立创建其他配置文件 :param section: 块 :param option: 修改key :param value: 修改值 :return: True 修改成功 """ self.config.read(self.sql_path, encoding="utf-8") if section not in self.config.sections(): self.config.add_section(section) self.config.set(section, option, value) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success") elif section in self.config.sections(): self.config.set(section, option, value) with open(self.sql_path, "w+", encoding="utf8") as f: self.config.write(f) self.log.info("Amend the success")
def __init__(self): ProxyManager.__init__(self) self.log = LogHandler('valid_schedule')
-------------------------------------------- """ __author__ = 'wanglin' import os import json import cx_Oracle import MySQLdb from DBUtils.PooledDB import PooledDB from util.ConfigHandler import ConfigHandler from util.Singleton import Singleton from util.LogHandler import LogHandler os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8' log = LogHandler('DBManager') class DBManager(object): __metaclass__ = Singleton def __init__(self, option='oracle'): self.option = option self.config = ConfigHandler().get(self.option) if option == 'oracle': dsn = '{host}:{port}/{db}'.format(host=self.config['host'], port=self.config['port'], db=self.config['db']) connKwargs = {'user': self.config['user'], 'password': self.config['passwd'], 'dsn': dsn} self._pool = PooledDB(cx_Oracle, mincached=2, maxcached=2, maxshared=5, maxconnections=10, **connKwargs) else: connKwargs = {'host': self.config['host'], 'port': int(self.config['port']), 'user': self.config['user'], 'passwd': self.config['passwd'], 'db': self.config['db'], 'charset': self.config['charset']} self._pool = PooledDB(MySQLdb, mincached=2, maxcached=2, maxshared=5, maxconnections=10, **connKwargs)
class SqliteClient(object): def __init__(self, dbtype='sqlit'): """ :param dbtype: 选择数据库类型 """ self.log = LogHandler("db") DBCONFIG = DBConfig().get_db_config(dbtype) ROOT_PATH = os.path.join(os.path.dirname(os.path.abspath(CURRENT_PATH)), DBCONFIG.get('path')) DB_NAME = DBCONFIG.get("dbname") DB_PATH = os.path.join(ROOT_PATH,DB_NAME) print(DB_PATH) self.conn = sqlite3.connect(DB_PATH) self.c = self.conn.cursor() def create_table_sqlite(self): """ 创建数据表 :return: false true """ try: sql = "create table if not exists ipdaili(ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT)" # self.c.execute('''CREATE TABLE ipdaili # (ip_addr TEXT, ip_port TEXT, type TEXT,ip_proxy TEXT, Downloadtime TEXT )''') self.c.execute(sql) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("create success") return True def insert_table_sqlite(self, ip_addr, ip_port, type,ip_proxy): """ 插入数据 :param ip_addr: ip地址 :param ip_port: 端口 :param type: 类型 :return:false true """ downloadtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") try: self.c.execute("INSERT INTO ipdaili (ip_addr,ip_port,type,ip_proxy,Downloadtime) VALUES (?,?,?,?,?)", (ip_addr, ip_port, type,ip_proxy, downloadtime)) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("insert success") return True def search_table_sqlite(self, sql="select * from ipdaili"): """ 查询数据数 :param sql:执行sql语句 :return:结果值 false """ try: res = self.c.execute(sql) self.conn.commit() except Exception as e: self.log.error(e) return False else: self.log.info("search success") return res.fetchall() def __del__(self): """ 关闭链接 :return: """ # class_name = self.__class__.__name__ self.conn.close()
class MysqlCline(object): def __init__(self, dbtype): """ 创建数据库 :param dbtype: 数据库类型 """ self.log = LogHandler("db") dbconfig = DBConfig().get_db_config(dbtype) # self.connection = pymysql.connect( # **dbconfig, # ) if dbtype == "mysql": # print("ok") self.connection = pymysql.connect(**dbconfig, ) def create_table_mysql(self): """ 创建表 :return: false true """ sql = """CREATE TABLE IF NOT EXISTS ipdaili ( ip_addr varchar(30) DEFAULT NULL, ip_port varchar(11) DEFAULT NULL, type varchar(10) DEFAULT NULL, Downloadtime varchar(30) DEFAULT NULL )""" try: cursor = self.connection.cursor() cursor.execute(sql) self.connection.commit() self.log.info("create success") return True except Exception as e: self.log.error(e) return False finally: self.log.info("create success") return True pass def insert_table_mysql(self, ip_addr, ip_port, type): """ 插入数据 :param ip_addr: ip地址 :param ip_port: 端口 :param type: 类型 :return:false true """ # 插入数据 # TODO 不能用with try: cursor = self.connection.cursor() downloadtime = datetime.datetime.now().strftime( "%Y-%m-%d %H:%M:%S") sql = "INSERT INTO ipdaili VALUES ('" + ip_addr + "','" + ip_port + "','" + type + "','" + downloadtime + "');" cursor.execute(sql) self.connection.commit() self.connection.commit() self.log.info("inserter sql success") return True except Exception as e: self.log.error(e) return False finally: self.log.info("insert success") def search_table_mysql(self, sql="select * from ipdaili"): """ 查询数据库 :param sql:查询语句 :return:结果值 false """ try: cursor = self.connection.cursor() cursor.execute(sql) res = cursor.fetchall() except Exception as e: self.log.error(e) return False finally: self.log.info("search success") return res def __del__(self): """ 关闭数据库链接 :return: """ self.connection.close()
File Name: monitorSpider Description: Author: wanglin Date: 2017/12/28 -------------------------------------------- Change Activity:2017/12/28; -------------------------------------------- """ __author__ = 'wanglin' import json import requests import datetime from util.LogHandler import LogHandler log = LogHandler('monitorSpider') name = {'Total': '-', 'Used': '-', 'Free': '-', 'PercentUsed': '-', 'TotalBlocks': '-', 'TotalFiles': '-', 'SoftwareVersion': '-'} node = {'name': '-', 'lastContact': '-', 'xferaddr': '-', 'adminState': '-', 'capacity': '-', 'usedSpace': '-', 'blockPoolUsedPercent': '-', 'version': '-', } def get_info(): url = 'http://192.168.88.1:50070/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo' try: r = requests.get(url=url) except Exception as ex: print(ex)
def test_log_handler(): log = LogHandler("Tlog") log.info("test log") log.resetName("test1") log.info('this is a log from test1') log.resetName('test2') log.info('this is a log from test2')
""" ------------------------------------------- File Name: jinja2html Description: Author: wanglin Date: 2017/12/28 -------------------------------------------- Change Activity:2017/12/28; -------------------------------------------- """ __author__ = 'wanglin' import os from jinja2 import Environment, FileSystemLoader from util.LogHandler import LogHandler log = LogHandler('jinja2html') PATH = os.path.dirname(os.path.abspath(__file__)) TEMPLATE_ENVIRONMENT = Environment(autoescape=False, loader=FileSystemLoader( os.path.join(PATH, 'templates')), trim_blocks=False) def create_html(nameinfo, datainfo, tableinfo): context = {'info': nameinfo, 'datainfo': datainfo, 'tableinfo': tableinfo} log.info('Jinja1 context info: {}'.format(context)) html = TEMPLATE_ENVIRONMENT.get_template('base.html').render(context) log.info('Successful rendering report page. ') return html
class IpSpider(object): def __init__(self, urltype): """ :param urltype: 0-国内高匿代理IP;1-国内透明代理IP;2-国内HTTPS代理IP;3-国外高匿代理IP """ url_list = { 0: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=1', 1: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=2', 2: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=3', 3: 'http://www.pcdaili.com/index.php?m=daili&a=free&type=4', } if urltype in [0, 1, 2, 3]: self.url = url_list.get(urltype) self.ua = UserAgent() self.sqlite = SqliteClient() self.sqlite.create_table_sqlite() self.log = LogHandler("db") def run_spider(self, page): """ 进行爬虫抓取 :param page:几页 :return:tuple """ iplist = [] for x in range(1, page + 1): headers = {'Host': 'www.pcdaili.com', "user-agent": self.ua.chrome} sp_url = self.url + "&page=%d" % x try: r = requests.get(sp_url, headers=headers) except Exception as e: self.log.error(e) finally: html = etree.HTML(r.text) res = html.xpath( '/html/body/div/div/div[2]/table/tbody/tr/td/text()') iptuple = self.group_list(res, 7) iplist.append(iptuple) time.sleep(1) self.log.info("spider html ok") return iplist def group_list(self, grouped, length): """ 分组 :param grouped:列表 :param length:分组长度 :return: [(),()] """ d = [ tuple(grouped[i:i + length]) for i in range(0, len(grouped), length) ] return d[:13] def ip_insert_sql(self, ip_list): """ ip代理插入数据库 :param ip_list: ip列表 :return: """ for y in range(len(ip_list)): # print(ip_list[y]) for x in ip_list[y]: ip_addr = x[0] ip_port = x[1] type = x[3] ip_proxy = type + "://" + ip_addr + ":" + ip_port is_ok_ip = self.validate_ip(type=type, ip_proxy=ip_proxy) if is_ok_ip: insert_res = self.sqlite.insert_table_sqlite( ip_addr=ip_addr, ip_port=ip_port, type=type, ip_proxy=ip_proxy) return True def validate_ip(self, type, ip_proxy): """ 测试ip是否能够代理访问https://weibo.com/ :param type:ip类型 :param ip_proxy:IP地址 :return:true false """ test_url = "https://weibo.com/" proxies = {type: ip_proxy} try: requests.get(test_url, proxies=proxies) except Exception as e: self.log.error(e) return False else: self.log.info(ip_proxy + " is ok !test url is " + test_url) return True
def __init__(self, ): self.config = ConfigParser() self.name = "config.ini" self.sql_path = os.path.join(ROOT_PATH, self.name) self.log = LogHandler("db")
# -*- coding: utf-8 -*- import requests from lxml import etree from util.LogHandler import LogHandler from util.WebRequest import WebRequest logger = LogHandler(__name__) def robustCrawl(func): def decorate(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: logger.info(u"sorry,主区出错。原因:") logger.info(e) return decorate def verifyProxyFormat(proxy): import re verify_regex = r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}' return True if re.findall(verify_regex, proxy) else False def getHtmlTree(url, **kwargs): header = { 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Upgrade-Insecure-Requests': '1',