def __init__(self): """吉林""" super(REQ).__init__() self.s = requests.session() self.s.headers.update({ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", }) self.proxy,self.ipItem = ABY() self.IPcount = 0 self.log = get_log() self.baseUrl = 'http://cx.jlsjsxxw.com/UserInfo/CertifiedEngineers.aspx' self.page = 1 # self.nextpage = 2 self.pagecount = 25862
def __init__(self): """青海""" super(REQ).__init__() self.s = requests.session() self.s.headers.update({ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", "Cookie": "UM_distinctid=1732868fb4344b-009cfeb9d14544-3a65420e-1fa400-1732868fb443ea; Hm_lvt_03b8714a30a2e110b8a13db120eb6774=1594110837,1594366937,1594602475; regionId=510000; token=3f200f0b24c74429b118d0621bbb28d3; acw_tc=2760825e15946082441805991e19acfd3194ae4680a04bfe334f8ce2aa98eb; CNZZDATA1275173796=34275469-1594106682-%7C1594607048; Hm_lpvt_03b8714a30a2e110b8a13db120eb6774=1594609014" }) self.proxy,self.ipItem = ABY() self.IPcount = 0 self.log = get_log() self.baseUrl = 'http://139.170.150.135/dataservice/query/staff/list' self.page = 9100 # self.nextpage = 2 self.pagecount = 16888
def __init__(self): """湖北 较慢,最后抓取""" super(REQ).__init__() self.s = requests.session() self.s.headers.update({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", }) self.proxy, self.ipItem = ABY() self.IPcount = 0 self.log = get_log() self.baseUrl = 'http://jg.hbcic.net.cn/web/RyManage/RySearch.aspx?rylx=snry' self.page = 1 # self.nextpage = 2 self.pagecount = 9624
def __init__(self): """内蒙古""" super(REQ).__init__() self.s = requests.session() self.s.headers.update({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36", }) self.proxy, self.ipItem = ABY() self.IPcount = 0 self.log = get_log() self.baseUrl = 'http://110.16.70.26/nmjgpublisher/handle/ProjectsInfoHandler.ashx' self.page = 1 # self.nextpage = 2 self.pagecount = 273