def __init__(self, fileName): self.url = 'https://map.baidu.com/' # 边界查询 self.geoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64' # poi查找 self.searchUrl = 'http://api.map.baidu.com/?qt=s&c=131&rn=100&ie=utf-8&oue=1&res=api&wd=' ''' https://map.baidu.com/?qt=ext&uid=da961f7ddadb6962deee8bb8&ext_ver=new&&nn=0&l=18 ''' # 搜索 名字 搜索 id 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2' # 查询边界的url self.wdToUuidUrl = 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2' ## 用 url 搜索出可能的 名字 和 uuid self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64' # 然后用这个url查找 是否存在 pylgon self.fileName = fileName self.pois = [] # 保存 poi数据 self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) coordTrans = coordinateTranslate.GPS() self.bd09miTowgs84 = coordTrans.convert_BD09MI_to_WGS84 self.path = createNewDir.createDir(r'./tab/baidu_map/') self.currFile = self.path + r'/curr.dat' self.nameFile = self.path + r'/name.dat' self.poisFile = self.path + r'/baidu_poi.csv' with open(self.nameFile, mode='r', encoding='gbk', errors=None) as f: # 将采集进度写入文件 self.nameList = [name.strip('\n') for name in f.readlines()]
def __init__(self, fileName): self.url = 'https://map.baidu.com/' # 边界查询 self.geoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64' # poi查找 self.searchUrl = 'http://api.map.baidu.com/?qt=s&c=131&rn=100&ie=utf-8&oue=1&res=api&wd=' ''' https://map.baidu.com/?qt=ext&uid=da961f7ddadb6962deee8bb8&ext_ver=new&&nn=0&l=18 ''' # 搜索 名字 搜索 id 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2' # 查询边界的url # self.wdToUuidUrl = 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2' self.wdToUuidUrl = 'http://map.baidu.com/su?cid=233&type=0&newmap=1&pc_ver=2&wd=' ## 用 url 搜索出可能的 名字 和 uuid # self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=' self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&c=233&uid=' # 然后用这个url查找 是否存在 pylgon #获取poi详细信息 的url #https://map.baidu.com/?ugc_type=3&ugc_ver=1&qt=detailConInfo&device_ratio=1&compat=1&t=1539394773310&uid=827202426e2c6305f33cba83&primaryUid=15840892553483683032&auth=fevy6Dcex31fdLP9AOdODI998f0Z3edJuxHBTBTENxVtComRB199A1GgvPUDZYOYIZuVt1cv3uVtGccZcuVtPWv3GuzEtXzljPaVjyBDEHKOQUWYxcEWe1GD8zv7u%40ZPuVteuztghxehwzJDVD66zJGvpHhOaQD2JKGpt66FUExcc%40AZ self.getInfoUrl = 'https://map.baidu.com/?ugc_type=3&ugc_ver=1&qt=detailConInfo&device_ratio=1&compat=1&t=1539394773310&uid=' # 通过搜索框 获取poi的列表信息 self.searchBoxUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&da_src=searchBox.button&c=231&src=0&wd2=&pn=0&sug=0&l=16&from=webmap&sug_forward=&device_ratio=1&tn=B_NORMAL_MAP&nn=0&ie=utf-8&wd=' self.fileName = fileName self.pois = [] # 保存 poi数据 self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) coordTrans = coordinateTranslate.GPS() self.bd09miTowgs84 = coordTrans.convert_BD09MI_to_WGS84 self.path = createNewDir.createDir(r'./tab/baidu_map/') self.currFile = self.path + r'/curr.dat' self.nameFile = self.path + r'/name.csv' self.poisFile = self.path + r'/baidu_poi.csv' with open(self.nameFile, mode='r', encoding='gbk', errors=None) as f: # 将采集进度写入文件 self.nameList = [name.strip('\n') for name in f.readlines()]
def __init__(self): # 查询城市code的url self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/' # 保存城市code的文件名 self.cityCodeFile = './cityCode.csv' self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) # 保存城市的列表 self.cityList = [] # 当前城市 self.city = {} self.cityAcronym = "" # 保存分类的 列表 self.cateList = [] # 保存子区域的列表 self.areaList = [] # 当前需要采集的url 列表 self.openUrlList = [] # poi 相信信息的列表 self.PoiInfos = [] # # 保存的 csv文件 路径 和 文件名称 # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv' # # 保存的 采集进度的 文件名 # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat' # 坐标系转换模块 gps = ct.GPS() self.coordTrans = gps.gcj_decrypt_exact
def __init__(self): # 查询城市code的url self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/' # 保存城市code的文件名 self.cityCodeFile = './cityCode.csv' self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) # 保存城市的列表 self.cityList = [] # 当前城市 self.city = {} self.cityAcronym = "" # 保存分类的 列表 self.cateList = [] # 保存子区域的列表 self.areaList = [] # 当前需要采集的url 列表 self.openUrlList = [] # poi 相信信息的列表 self.PoiInfos = [] # # 保存的 csv文件 路径 和 文件名称 # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv' # # 保存的 采集进度的 文件名 # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat' # #保存URL列表的文件 # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv' # 坐标系转换模块 gps = ct.GPS() self.coordTrans = gps.gcj_decrypt_exact self.citySearchUrl = { '西安市': 'https://xa.meituan.com/s/', '宝鸡市': 'https://baoji.meituan.com/s/', '咸阳市': 'https://xianyang.meituan.com/s/', '榆林市': 'https://yl.meituan.com/s/', '延安市': 'https://yanan.meituan.com/s/', u'汉中市': 'https://hanzhong.meituan.com/s/', '铜川市': 'https://tc.meituan.com/s/', '商洛市': 'https://sl.meituan.com/s/', '渭南市': 'https://wn.meituan.com/s/', '安康市': 'https://ankang.meituan.com/s/' } self.header = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "no-cache", "Connection": "keep-alive", "Cookie": "__mta=108425917.1551143452201.1551143452201.1551316904069.2; iuuid=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _lxsdk_cuid=1653bf57b9e42-064eff1fa336df-252b1971-100200-1653bf57b9f61; _lxsdk=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _hc.v=7ef28d0d-ed51-ad2f-1b7f-662935ef4790.1541148357; webp=1; cityname=%E8%A5%BF%E5%AE%89; latlng=34.227458,108.882816,1550806592400; __utmz=74597006.1550806593.4.4.utmcsr=meishi.meituan.com|utmccn=(referral)|utmcmd=referral|utmcct=/i/poi/; i_extend=C_b1Gimthomepagecategory11H__a; _lx_utm=utm_source%3Dmeishi.meituan.com%26utm_medium%3Dreferral%26utm_content%3D%252Fi%252Fpoi%252F; __mta=108425917.1551143452201.1551143452201.1551143452201.1; uuid=95fdf18f9d77473398b1.1551316862.1.0.0; ci=358; rvct=358%2C356%2C359%2C819%2C772%2C357%2C355%2C360%2C352%2C354%2C353", "DNT": "1", "Host": "yl.meituan.com", "Pragma": "no-cache", "Referer": "https://yl.meituan.com/s/%E4%BA%8C%E9%A9%AC%E5%B8%88%E5%82%85/", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36", }
功能描述: 百度地图 电子边框 """ import time, json, os from fake_useragent import UserAgent from selenium import webdriver from selenium.webdriver import ActionChains from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import coordinateTranslate # 初始化坐标系转化对象 coordTrans = coordinateTranslate.GPS() def miToGPS(lon, lat): coord = coordTrans.convert_BD09MI_to_WGS84(float(lon), float(lat)) return coord def getUserAgent(): ua = UserAgent() # 初始化 随机'User-Agent' 方法 tmpuserAnent = 'user-agent="' + ua.random + '"' return tmpuserAnent def seleniumChromeInit(): # 模拟创建一个浏览器对象,然后可以通过对象去操作浏览器
def __init__(self): # 查询城市code的url self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/' # 保存城市code的文件名 self.cityCodeFile = './cityCode.csv' self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) # 保存城市的列表 self.cityList = [] # 当前城市 self.city = {} self.cityAcronym = "" # 保存分类的 列表 self.cateList = [] # 保存子区域的列表 self.areaList = [] # 当前需要采集的url 列表 self.openUrlList = [] # poi 相信信息的列表 self.PoiInfos = [] # # 保存的 csv文件 路径 和 文件名称 # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv' # # 保存的 采集进度的 文件名 # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat' # #保存URL列表的文件 # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv' # 坐标系转换模块 gps = ct.GPS() self.coordTrans = gps.gcj_decrypt_exact self.citySearchUrl = { '西安市': 'https://xa.meituan.com/s/', '宝鸡市': 'https://baoji.meituan.com/s/', '咸阳市': 'https://xianyang.meituan.com/s/', '榆林市': 'https://yl.meituan.com/s/', '延安市': 'https://yanan.meituan.com/s/', u'汉中市': 'https://hanzhong.meituan.com/s/', '铜川市': 'https://tc.meituan.com/s/', '商洛市': 'https://sl.meituan.com/s/', '渭南市': 'https://wn.meituan.com/s/', '安康市': 'https://ankang.meituan.com/s/' } self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Cache-Control": "max-age=0", "Connection": "keep-alive", # "Cookie": "__mta=146243211.1551452178822.1551452178822.1551452270619.2; uuid=63ead3fae1b34c05957f.1551452154.1.0.0; ci=359; rvct=359; _lxsdk_cuid=16939c187b4c8-0f2c4d416da6c-42017773-100200-16939c187b460; _lxsdk_s=16939c187b6-557-aef-f46%7C%7C11", "Host": "www.meituan.com", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36 Avast/65.0.411.162" } self.diguiCount = 0
def __init__(self): # 查询城市code的url self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/' # 保存城市code的文件名 self.cityCodeFile = './cityCode.csv' self.ua = UserAgent() # 初始化 随机'User-Agent' 方法 self.userAnent = 'user-agent="' + self.ua.random + '"' print(self.userAnent) # 保存城市的列表 self.cityList = [] # 当前城市 self.city = {} self.cityAcronym = "" # 保存分类的 列表 self.cateList = [] # 保存子区域的列表 self.areaList = [] # 当前需要采集的url 列表 self.openUrlList = [] # poi 相信信息的列表 self.PoiInfos = [] # # 保存的 csv文件 路径 和 文件名称 # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv' # # 保存的 采集进度的 文件名 # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat' # #保存URL列表的文件 # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv' # 坐标系转换模块 gps = ct.GPS() self.coordTrans = gps.gcj_decrypt_exact self.citySearchUrl = { '西安市': 'https://xa.meituan.com/s/', '宝鸡市': 'https://baoji.meituan.com/s/', '咸阳市': 'https://xianyang.meituan.com/s/', '榆林市': 'https://yl.meituan.com/s/', '延安市': 'https://yanan.meituan.com/s/', u'汉中市': 'https://hanzhong.meituan.com/s/', '铜川市': 'https://tc.meituan.com/s/', '商洛市': 'https://sl.meituan.com/s/', '渭南市': 'https://wn.meituan.com/s/', '安康市': 'https://ankang.meituan.com/s/' } self.headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.9", "Connection": "keep-alive", "Cookie": "__mta=20812423.1551143861269.1551447027375.1551500101694.5; iuuid=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _lxsdk_cuid=1653bf57b9e42-064eff1fa336df-252b1971-100200-1653bf57b9f61; _lxsdk=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _hc.v=7ef28d0d-ed51-ad2f-1b7f-662935ef4790.1541148357; webp=1; cityname=%E6%B8%AD%E5%8D%97; i_extend=C_b1Gimthomepagecategory11H__a100005__b1; __utmz=74597006.1551318637.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); uuid=a75c37b0cb0b4e8d87a5.1551445379.1.0.0; ci=354; rvct=354%2C358%2C356%2C355%2C1155%2C359%2C819%2C772%2C357%2C360%2C352; __mta=20812423.1551143861269.1551143876409.1551500085679.3", "DNT": "1", "Host": "xianyang.meituan.com", "Referer": "https://xianyang.meituan.com/s/%E5%95%8A", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36" }
lineList[lonIndex], lineList[latIndex] = str(coord['lon']), str( coord['lat']) line = ",".join(lineList) + "\n" return line if __name__ == "__main__": fileName = r'E:\工具\资料\宝鸡\研究\当前任务\fast\宝鸡\大众点评_宝鸡.csv' csvLines = readCsv(fileName) # 确定 lon 和 lat 在表中的列的 位置 lonIndex, latIndex = getLonLatIndex(csvLines[0]) tran = coordinateTranslate.GPS() tranCoord = tran.gcj_encrypt info = [] for i, line in enumerate(csvLines[1:-1]): print(csvLines[i + 1]) csvLines[i + 1] = transCoord(line, lonIndex, latIndex) info.append(csvLines[i + 1]) if i / 100 == 1.0: with open(fileName + ".tran", mode='a+', encoding='gbk', errors='ignore') as f: # 将采集进度写入文件 f.writelines(info) info = []