def __init__(self): self.start_url = 'http://www.98pz.com/t59c11s1/1.html' self.headers = { 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)' } self.r0 = Redisclient(0) self.r1 = Redisclient(1) self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', "98guakao_hz_qz")
def __init__(self): self.starturl = 'http://bj.atobo.com/' # 数据类型模板 self.data_demo = { '_id': '', 'category_name': '', 'company_name': '', 'company_phone': '', 'company_address': '' } self.f = FETCH() self.m = MongoDB( 'mongodb://10.2.1.121:17017,10.2.1.122:17017,10.2.1.123:17017', 'clues_resources', 'ZZ_20201214_rollback')
def __init__(self): self.start_url = 'http://www.9gk.cc/zp/sichuan/' self.headers = { 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)' } self.headers_fordata = { # ":authority":"www.cbi360.net", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip,deflate,br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "Cookie": "Hm_lvt_ccf8b732d64d55d0d8a73ec2bcd276ab=1612144130,1612399856,1612752316,1613704044; Hm_lpvt_ccf8b732d64d55d0d8a73ec2bcd276ab=1613704100", "Connection": "keep-alive", "Host": "www.9gk.cc", "pragma": "no-cache", "Referer": "http://www.9gk.cc/zp/p1700", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", } self.r0 = Redisclient(0) self.r1 = Redisclient(1) self.r2 = Redisclient(2) self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', "9guakao_chengdu")
def __init__(self): # 起始url self.starturl = 'http://chengdu.atobo.com/' # 数据类型模板 self.data_demo = { '_id': '', 'category_name': '', 'company_name': '', 'company_phone': '', 'company_address': '' } # 封装的自带代理ip池的请求 self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', 'BMD_atb_chengdu') self.r0 = Redisclient(0) self.r1 = Redisclient(1) self.r2 = Redisclient(2) self.r3 = Redisclient(3) self.category_list = []
def __init__(self): self.start_url = 'https://b2b.11467.com/' self.headers = b"""Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9 Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Cookie: Hm_lvt_819e30d55b0d1cf6f2c4563aa3c36208=1616553403,1617870200; Hm_lpvt_819e30d55b0d1cf6f2c4563aa3c36208=1617870504; arp_scroll_position=400 Host: b2b.11467.com Pragma: no-cache Referer: https://www.11467.com/ sec-ch-ua: "Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99" sec-ch-ua-mobile: ?0 Sec-Fetch-Dest: document Sec-Fetch-Mode: navigate Sec-Fetch-Site: same-site Sec-Fetch-User: ?1 Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36""" self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', "shunqiwang") self.r2 = Redisclient(2) self.r3 = Redisclient(3) self.area_name_list = []
def data_spdier(collection_name, periods, category, total_page, pic_url_list): """ :param collection_name:存入mongo的集合名 str :param periods: 公告期数 str :param category: 公告种类(例如送达公告,无效公告等)str :param total_page: 公告数据总页数 int :return: 本地cuiworkdb collection_name 中查看数据 """ db = MongoDB('mongodb://localhost', 'cuiworkdb', collection_name) driver = webdriver.Chrome() driver.implicitly_wait(10) driver.get( 'http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearch.html?annNum=' ) driver.find_element_by_xpath('//input[@id="annNum"]').send_keys(periods) # 点击公告类型 sleep(0.2) driver.find_element_by_xpath('//select[@id="annTypes"]').click() # 点击送达公告 #'//select[@id="annTypes"]//option[@value="TMSDGG"]' driver.find_element_by_xpath( '//select[@id="annTypes"]//option[@value="{}"]'.format( category)).click() # 点击查询 driver.find_element_by_xpath('//input[@name="annSearchBut"]').click() sleep(2) ### ####此处代码爬取数据 ###每个都是20个数据,查看需要处理 # 注册号 //table//tr[@class="evenBj"]/td[5] # 申请人 //table//tr[@class="evenBj"]/td[6] # 商标名称 //table//tr[@class="evenBj"]/td[7] # 查看 //table//tr[@class="evenBj"]/td[8] count = 0 page = 1 number = 0 while True: # 终止页数 if page == total_page + 1: break sleep(1) register_num = driver.find_elements_by_xpath( '//table//tr[@class="evenBj"]/td[5]') company_name = driver.find_elements_by_xpath( '//table//tr[@class="evenBj"]/td[6]') brand_name = driver.find_elements_by_xpath( '//table//tr[@class="evenBj"]/td[7]') for i in range(len(register_num)): item = {} item['_id'] = count + 1 item['num'] = periods item['category'] = '送达公告' item['type'] = '' item['register_num'] = register_num[i].text try: item['company_name'] = company_name[i].text except: item['company_name'] = '' try: item['brand_name'] = brand_name[i].text except: item['brand_name'] = '' if i % 2 == 0 or i == 0: item['pic_url'] = pic_url_list[number] print(count) #0 1 2 3 4 5 6 7 # else: item['pic_url'] = pic_url_list[number - 1] number -= 1 print(item) db.mongo_add(item) number += 1 count += 1 sleep(2) driver.find_element_by_xpath( '//div[@id="pages"]//td[8]//a//span[2]').click() page += 1 sleep(1)
#2021/1/11 爬取商标文字信息,获取图片页码 import requests import json from TOOLS.mongosave import MongoDB from TOOLS.md5encode import md5encryption #改数据库 db = MongoDB('mongodb://localhost', 'cuiworkdb', "Shangbiao_GG-1731") from Func.fetchJX import FETCH s = FETCH() #改url url = 'http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearchDG.html?O56fzBVE=5kGuYaPrUkHH9Lq8YrTHefgypngRP23L4qgXUWGQn1TKi8Yd5igEworl0xfbe_QQgAs_cOt3plSW7uuYJrq7L1WIUpjbSV_Y8jwT7pDt3qBvN3dxHlaivZlvTyYxD3JctgtaJru5MJhZZxGydeS.3ZoIfni9CZxyKko2tQuVGHLbUbVBWout9qOnP1i6mGCnxEGiUea_nSP_3xljf3U6zkgZ.c5DKXAuQiGzZjKcCLOKPsuFP3CgjXwhbt5ESmD3jfvCNBjc.Mtyy4_D_bfDngudJ.DvhsEJGWicOi6eI9.5BhuIoL5WfOPkkmcebfPQXvuh0SBxzitoPdczDRmEvxbzY2c5irpolrybljU4ZbUVkv0X8Dz5Kv38UUvuDfGXI' #改Cookie headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Content-Length": "283", "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "Cookie": "_gscu_1645064636=76464150vsisqf48; _gscu_2023327167=76464150m8szyi25; _trs_uv=k9wi5ba1_4030_8pj2; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1597140998; UM_distinctid=174ae765669480-09ef6ad0f222cb-4353761-1fa400-174ae76566aa07; goN9uW4i0iKzS=5db92.A0J2CMY23basgx2TZ.mTIJ7lkLr89FeTJ1C0aRMHE_2AokqW2_4RJ42AQplsUcWhHGBKqZ3JYJcp..cRA; __jsluid_h=b6457e19fe1b05edea1f19ada75c9f46; tmas_cookie=2272.7688.15400.0000; 018f9ebcc3834ce269=09b16dacaa2e3b985f2267dc76e5ae8f; JSESSIONID=0000mA5W99E1uXfd1qh0wgqzyqA:1bm112s99; goN9uW4i0iKzT=53cCT8DqzzR9qqqm67L0OCGfXkxa3Eg9kcZgg2BzmN4mJeGvNh.af42XRAU.5pBn6JEBVQW9X7_5Q0c0BLcubFHR3V2NtqqslXLY0Rg.3qvRoSOo.eXYEunrAawqXfJ4OYHTCLen_Z85LNWTB77aJOXfqtOqhlOUMzVD_5wlioEYc22WaLxHAvTwqbtDutolgF8kpTIldeoQJwo89qgNpe0ZOzZwHaaYC3qh7.7bucy3WpAnMVKFV_K_LPWPdL195mAzPq8uiBWY5CRMjmCfU88wyS.H5RFGSvrFx87nTLofgdhXnNMBq1vgUkTx5FYpDxvN5jaQg8eqCoedhokTjYW", "Host": "wsgg.sbj.cnipa.gov.cn:9080", "Origin": "http://wsgg.sbj.cnipa.gov.cn:9080", "Pragma": "no-cache", "Referer": "http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearch.html", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", "X-Requested-With": "XMLHttpRequest",
class Spider9(): def __init__(self): self.start_url = 'http://www.9gk.cc/zp/sichuan/' self.headers = { 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)' } self.headers_fordata = { # ":authority":"www.cbi360.net", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip,deflate,br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Content-Type": "application/x-www-form-urlencoded;charset=UTF-8", "Cookie": "Hm_lvt_ccf8b732d64d55d0d8a73ec2bcd276ab=1612144130,1612399856,1612752316,1613704044; Hm_lpvt_ccf8b732d64d55d0d8a73ec2bcd276ab=1613704100", "Connection": "keep-alive", "Host": "www.9gk.cc", "pragma": "no-cache", "Referer": "http://www.9gk.cc/zp/p1700", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36", } self.r0 = Redisclient(0) self.r1 = Redisclient(1) self.r2 = Redisclient(2) self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', "9guakao_chengdu") def get_category_url(self): for i in range(14): url = self.start_url + "p" + str(1700 + i) self.r0.save_page_url("上海", url) def get_all_page(self): while True: try: url = self.r0.get_page_url("上海") except: break #/page/2 self.r1.save_page_url("上海", url) try: html = requests.get(url=url, headers=self.headers) except: break print(html.text) res = etree.HTML(html.text) try: last_page = res.xpath( '//ul[@class="pagination"]//li[@class="disable"]//following-sibling::li//a/text()' ) if last_page == []: last_page = list( res.xpath('//ul[@class="pagination"]//li//a/text()') [-1]) except Exception as e: print(e) break for i in range(2, int(last_page[0]) + 1): page_url = str(url, "utf-8") + r'/page/{}'.format(i) self.r1.save_page_url("上海", page_url) def parse_item_url(self): #//div[@class="col-xs-12 boxshadow"]//div[@class="col-lg-12 bk-btm-xuxian pad-10"]//div[@class="col-lg-5 pad-left20"]//a/@href while True: try: url = self.r1.get_page_url("上海") html = requests.get(url=url, headers=self.headers) except Exception as e: break # print(html.text) res = etree.HTML(html.text) # item_url_list = res.xpath('//div[@class="col-xs-12 boxshadow "]//div[@class="col-lg-12 bk-btm-xuxian pad-10"]//div[@class="col-lg-5 pad-left20"]//a/@href') item_url_list = res.xpath( '/html/body/div[5]/div/div/div/span/a/@href') for i in range(len(item_url_list)): print(item_url_list[i]) self.r2.save_page_url("上海", item_url_list[i]) def parse_data(self): while True: try: url = self.r2.get_page_url("上海") print(url) except: break headers = self.headers_fordata headers["Referer"] = url html = requests.get(url=url, headers=headers) res = etree.HTML(html.text) try: outName = res.xpath( '/html/body/div[3]/div[1]/div[2]/div[4]/text()')[0] phone = res.xpath( '/html/body/div[3]/div[1]/div[2]/div[6]/span/text()')[0] companyName = res.xpath( '/html/body/div[3]/div[1]/div[1]/h2/text()')[0] except: continue if is_phone(phone): if "企业管理" not in str(companyName): print(companyName) item = {} item['companyCity'] = "成都" item['companyProvince'] = "四川省" item['code'] = 'BUS_YT_ZZ' item['name'] = '资质' item['busCode'] = '' item['webUrl'] = '无' item['orgId'] = '' item['deptId'] = '' item['centreId'] = '' item["companyName"] = companyName item["outName"] = outName item["resourceRemark"] = '' item["companyTel"] = str(phone) item["ibossNum"] = None item['isDir'] = 0 item['isShare'] = 0 item["_id"] = md5encryption(item["companyTel"]) # item["flag"] = 0 print(item) self.m.mongo_add(item) else: continue def run(self): self.get_category_url() self.get_all_page() self.parse_item_url() self.parse_data() def test(self): pass
class Spider98_zhaoping: def __init__(self): self.start_url = 'http://www.98pz.com/t59c11s1/1.html' self.headers = { 'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)' } self.r0 = Redisclient(0) self.r1 = Redisclient(1) self.f = FETCH() self.m = MongoDB('mongodb://localhost', 'cuiworkdb', "98guakao_hz_qz") def parse_next_page(self): self.r0.save_page_url(category_name='杭州求职', page_url=self.start_url) html = requests.get(url=self.start_url, headers=self.headers) sleep(0.5) while True: res = etree.HTML(html.text) try: next_page_url = res.xpath( '//div[@class="pager"]//a[@class="next"]/@href')[0] except: break next_page_url = 'http://www.98pz.com/' + next_page_url print(next_page_url) self.r0.save_page_url(category_name='杭州求职', page_url=next_page_url) html = requests.get(url=next_page_url, headers=self.headers) def parse_item_url(self): while True: url = self.r0.get_page_url(category_name='杭州求职') try: html = requests.get(url=url, headers=self.headers) sleep(0.5) except: break res = etree.HTML(html.text) item_url_list = res.xpath('//td[@class="t"]//a[1]')[:-1] for one in item_url_list: url = one.xpath('./@href')[0] self.r1.save_item_url(category_name='杭州求职', url=url) def parse_data(self): while True: item = {} url = self.r1.get_item_url(category_name='杭州求职') if b'www' not in url: url = 'http://www.98pz.com' + str(url) try: html = requests.get(url=url, headers=self.headers) sleep(0.5) except Exception as e: print(e) continue res = etree.HTML(html.text) try: company_name = res.xpath( '//span[@class="firm-name"]/a/@title')[0] except: continue # try: # info = res.xpath('//li/i[contains(text(),"注册情况:")]/following-sibling::*/text()')[0] # print(info) # except: # continue contact_people = res.xpath( '//li/i[contains(text(),"联 系 人:")]/following-sibling::*/text()' )[0] print(contact_people) try: phone_url = res.xpath( '//li/i[contains(text(),"固定电话:")]/following-sibling::*//img/@src' )[0] except: try: phone_url = res.xpath( '//li/i[contains(text(),"手机号码:")]/following-sibling::*//img/@src' )[0] except: continue resourceMark = res.xpath( '//li/i[contains(text(),"职位类型:")]/following-sibling::a//text()' ) resourceMark = resourceMark[0] + resourceMark[1] if phone_url == '': phone = '' else: try: phone = self.rec_img(phone_url) except: continue item['companyCity'] = '杭州' item['companyProvince'] = '浙江省' item['code'] = 'BUS_YT_ZZ' item['name'] = '资质' item['busCode'] = '' item['webUrl'] = '无' item['orgId'] = '' item['deptId'] = '' item['centreId'] = '' item["companyName"] = company_name item["outName"] = contact_people item["resourceRemark"] = resourceMark item["companyTel"] = phone item["ibossNum"] = None item['isDir'] = 0 item['isShare'] = 0 item["_id"] = md5encryption(item["companyTel"]) print(item) self.m.mongo_add(item) def rec_img(self, img_url): url_b = img_url.split('data:image/gif;base64,')[1] url_b = url_b.encode() content = base64.b64decode(url_b) with open(r'G:\rec_pic\target.jpg', 'wb') as f: f.write(content) text = pytesseract.image_to_string( Image.open(r'G:\rec_pic\target.jpg').convert('RGB')) os.remove(r'G:\rec_pic\target.jpg') return text def test(self): self.parse_item_url() def run(self): self.parse_next_page() self.parse_item_url() self.parse_data()
class Atb_spider(): def __init__(self): self.starturl = 'http://bj.atobo.com/' # 数据类型模板 self.data_demo = { '_id': '', 'category_name': '', 'company_name': '', 'company_phone': '', 'company_address': '' } self.f = FETCH() self.m = MongoDB( 'mongodb://10.2.1.121:17017,10.2.1.122:17017,10.2.1.123:17017', 'clues_resources', 'ZZ_20201214_rollback') def get_html(self, url): html = self.f.fetch(url) return html def parse_category_html(self, html): # 解析种类页面获取种类名称和种类url response = etree.HTML(html) category_list = response.xpath( '//div[@class="sidebar-category"]/ul//li/p[@class="pcategory_son"]/a' ) for category in category_list: category_name = category.xpath('./text()').get() category_url = category.xpath('./@href').get() category_url = 'http:' + category_url # yield category_url yield self.parse_more_html(self.get_html(category_url), category_name=category_name) def parse_more_html(self, html, category_name): # 获取更多页选项的url response = etree.HTML(html) more_company_url = response.xpath( '//div[@class="product-list-more"]/a/@href').get() yield self.parse_allpage(self.get_html(more_company_url), category_name=category_name) def parse_allpage(self, html, category_name): # 获取所有翻页url # 调用parse_one_url解析每个企业url yield出去,一页结束进入下方代码解析下一页url,再解析每个企业url self.parse_one_url(html, category_name=category_name) response = etree.HTML(html) one_page_url = response.xpath( '//a[contains(text(),"下一页")]/@href').extract() if one_page_url: one_page_url = 'http://www.atobo.com' + one_page_url[0] if one_page_url != 'http://www.atobo.com#': yield self.parse_allpage(self.get_html(one_page_url), category_name=category_name) def parse_one_url(self, html, category_name): # 获取一页当中每个企业的url list response = etree.HTML(html) one_url_list = response.xpath( '//li[@class="product_box"]//li[@class="pp_name"]//a[@class="CompanyName"]/@href' ).extract() for one_url in one_url_list: one_url = "http://www.atobo.com/" + one_url yield self.parse_data(self.get_html(one_url), category_name=category_name) def parse_data(self, html, category_name): # 获取最终数据 response = etree.HTML(html) try: # 图片规则 company_name = response.xpath( '//div[@class="company-intro"]//tr[1]/td/text()').get() company_address = response.xpath( '//div[@class="company-intro"]//table//tr[2]').get() company_address = find_address(company_address) if not company_name: # 非图片规则 company_name = response.xpath( '//div[@class="company-banner"]//p[@class="t-companyname"]/text()' ).get() company_address = response.xpath( '//div[@class="card-context"]//ul[2]//li[2]').get() company_address = find_address(company_address) except Exception: company_name = None company_address = None try: # 有手机号的情况 company_phone = response.xpath( '//div[@class="company-intro"]//tr/td/img/@src').extract()[1] company_phone = 'http:' + company_phone phone2 = get_phone(picurl=company_phone) phone = ''.join(phone2) if not company_phone: phone1 = response.xpath('//ul[4]//li[2]').get() pattern = re.compile("\d+", re.S) phone = pattern.findall(phone1) if not phone: phone3 = response.xpath( '//div[@class="company-intro"]//tr/td/img/@src' ).extract()[0] try: phone4 = 'http:' + phone3 phone5 = get_phone(picurl=phone4) phone = ''.join(phone5) except: phone4 = 'http:' + phone3 phone5 = get_phone(picurl=phone4) phone = phone5[0] except Exception: phone = None if company_name and phone: # mongo储存数据 id = phone_encrypte(phone) data = self.data_demo data["_id"] = id data["categoryname"] = category_name data["companyname"] = company_name data["companyaddress"] = company_address data["companyphone"] = phone self.m.save_data(datadict=data) yield def runspider(self): html = self.get_html(self.starturl) self.parse_category_html(html)
#白名单通用api #每日BMD任务更改日期(mongo集合名和文件位置) from TOOLS.mongosave import MongoDB db = MongoDB('mongodb://localhost', 'cuiworkdb', 'BMD20210205-chengdu') Format_code = { "项目": "BUS_YT_XM", "创业发展": "BUS_YT_CY", "资质": "BUS_YT_ZZ", "互联网": "BUS_YT_HLW", "财税": "BUS_YT_CS", "会计": "BUS_YT_KJ", "金融发展": "BUS_YT_JRFZ", "法律": "BUS_YT_FL", "培训(大卓商学院)": "BUS_YT_DZSXY", "综合": "BUS_YT_ZH", "项目(知产)": "BUS_YT_ZSCQ", "品牌": "BUS_YT_PP", "人事外包": "BUS_YT_RSWB", "装饰": "BUS_YT_ZS", "融资": "BUS_YT_DK", "商标版权": "BUS_YT_SBBQ", "专利项目": "BUS_YT_ZLXM", "巨方地产(禁用)": "BUS_YT_JFDC", "认证": "BUS_YT_TXRZ", "创新": "BUS_YT_CX", "网点交易": "BUS_YT_WDJY" } city = {
#图片下载到“下载”文件夹,按照数字顺序重命名 from selenium import webdriver from time import sleep from selenium.webdriver import ActionChains from TOOLS.mongosave import MongoDB import os from pykeyboard import * from pymouse import * ###固定代码 k = PyKeyboard() m = PyMouse() db = MongoDB('mongodb://localhost', 'cuiworkdb', 'Shangbiao_GG') driver = webdriver.Chrome() driver.maximize_window() driver.implicitly_wait(10) driver.get( 'http://wsgg.sbj.cnipa.gov.cn:9080/tmann/annInfoView/annSearch.html?annNum=' ) num = '1677' # 输入期数(可更改) driver.find_element_by_xpath('//input[@id="annNum"]').send_keys(num) # 点击公告类型