Example #1
0
    def __init__(self, fileName):
        self.url = 'https://map.baidu.com/'
        # 边界查询
        self.geoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64'
        # poi查找
        self.searchUrl = 'http://api.map.baidu.com/?qt=s&c=131&rn=100&ie=utf-8&oue=1&res=api&wd='
        ''' https://map.baidu.com/?qt=ext&uid=da961f7ddadb6962deee8bb8&ext_ver=new&&nn=0&l=18 '''

        # 搜索 名字 搜索 id
        'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2'
        # 查询边界的url

        self.wdToUuidUrl = 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2'
        ## 用 url 搜索出可能的 名字 和 uuid

        self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64'
        # 然后用这个url查找 是否存在 pylgon

        self.fileName = fileName
        self.pois = []  # 保存 poi数据
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)
        coordTrans = coordinateTranslate.GPS()
        self.bd09miTowgs84 = coordTrans.convert_BD09MI_to_WGS84

        self.path = createNewDir.createDir(r'./tab/baidu_map/')
        self.currFile = self.path + r'/curr.dat'
        self.nameFile = self.path + r'/name.dat'
        self.poisFile = self.path + r'/baidu_poi.csv'
        with open(self.nameFile, mode='r', encoding='gbk',
                  errors=None) as f:  # 将采集进度写入文件
            self.nameList = [name.strip('\n') for name in f.readlines()]
Example #2
0
    def __init__(self, fileName):
        self.url = 'https://map.baidu.com/'
        # 边界查询
        self.geoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid=3c753948c9a2427235b06c64'
        # poi查找
        self.searchUrl = 'http://api.map.baidu.com/?qt=s&c=131&rn=100&ie=utf-8&oue=1&res=api&wd='
        ''' https://map.baidu.com/?qt=ext&uid=da961f7ddadb6962deee8bb8&ext_ver=new&&nn=0&l=18 '''

        # 搜索 名字 搜索 id
        'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2'
        # 查询边界的url

        # self.wdToUuidUrl = 'https://map.baidu.com/su?wd=%E8%9E%8D%E4%BE%A8%20&cid=233&type=0&newmap=1&b=(12121496.865%2C4038346.77%3B12121862.365%2C4038655.77)&t=1539339239819&pc_ver=2'
        self.wdToUuidUrl = 'http://map.baidu.com/su?cid=233&type=0&newmap=1&pc_ver=2&wd='
        ## 用 url 搜索出可能的 名字 和 uuid

        # self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&c=233&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&uid='
        self.getGeoUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=ext&ext_ver=new&tn=B_NORMAL_MAP&nn=0&ie=utf-8&l=17&c=233&uid='
        # 然后用这个url查找 是否存在 pylgon

        #获取poi详细信息 的url
        #https://map.baidu.com/?ugc_type=3&ugc_ver=1&qt=detailConInfo&device_ratio=1&compat=1&t=1539394773310&uid=827202426e2c6305f33cba83&primaryUid=15840892553483683032&auth=fevy6Dcex31fdLP9AOdODI998f0Z3edJuxHBTBTENxVtComRB199A1GgvPUDZYOYIZuVt1cv3uVtGccZcuVtPWv3GuzEtXzljPaVjyBDEHKOQUWYxcEWe1GD8zv7u%40ZPuVteuztghxehwzJDVD66zJGvpHhOaQD2JKGpt66FUExcc%40AZ
        self.getInfoUrl = 'https://map.baidu.com/?ugc_type=3&ugc_ver=1&qt=detailConInfo&device_ratio=1&compat=1&t=1539394773310&uid='

        # 通过搜索框 获取poi的列表信息
        self.searchBoxUrl = 'https://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&da_src=searchBox.button&c=231&src=0&wd2=&pn=0&sug=0&l=16&from=webmap&sug_forward=&device_ratio=1&tn=B_NORMAL_MAP&nn=0&ie=utf-8&wd='

        self.fileName = fileName
        self.pois = []  # 保存 poi数据
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)
        coordTrans = coordinateTranslate.GPS()
        self.bd09miTowgs84 = coordTrans.convert_BD09MI_to_WGS84

        self.path = createNewDir.createDir(r'./tab/baidu_map/')
        self.currFile = self.path + r'/curr.dat'
        self.nameFile = self.path + r'/name.csv'
        self.poisFile = self.path + r'/baidu_poi.csv'
        with open(self.nameFile, mode='r', encoding='gbk',
                  errors=None) as f:  # 将采集进度写入文件
            self.nameList = [name.strip('\n') for name in f.readlines()]
Example #3
0
    def __init__(self):
        # 查询城市code的url
        self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/'
        # 保存城市code的文件名
        self.cityCodeFile = './cityCode.csv'
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)

        # 保存城市的列表
        self.cityList = []

        # 当前城市
        self.city = {}
        self.cityAcronym = ""

        # 保存分类的 列表
        self.cateList = []

        # 保存子区域的列表
        self.areaList = []

        # 当前需要采集的url 列表
        self.openUrlList = []

        # poi 相信信息的列表
        self.PoiInfos = []

        # # 保存的 csv文件 路径 和 文件名称
        # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv'
        # # 保存的 采集进度的 文件名
        # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat'

        # 坐标系转换模块
        gps = ct.GPS()
        self.coordTrans = gps.gcj_decrypt_exact
Example #4
0
    def __init__(self):
        # 查询城市code的url
        self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/'
        # 保存城市code的文件名
        self.cityCodeFile = './cityCode.csv'
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)

        # 保存城市的列表
        self.cityList = []

        # 当前城市
        self.city = {}
        self.cityAcronym = ""

        # 保存分类的 列表
        self.cateList = []

        # 保存子区域的列表
        self.areaList = []

        # 当前需要采集的url 列表
        self.openUrlList = []

        # poi 相信信息的列表
        self.PoiInfos = []

        # # 保存的 csv文件 路径 和 文件名称
        # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv'
        # # 保存的 采集进度的 文件名
        # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat'
        # #保存URL列表的文件
        # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv'

        # 坐标系转换模块
        gps = ct.GPS()
        self.coordTrans = gps.gcj_decrypt_exact

        self.citySearchUrl = {
            '西安市': 'https://xa.meituan.com/s/',
            '宝鸡市': 'https://baoji.meituan.com/s/',
            '咸阳市': 'https://xianyang.meituan.com/s/',
            '榆林市': 'https://yl.meituan.com/s/',
            '延安市': 'https://yanan.meituan.com/s/',
            u'汉中市': 'https://hanzhong.meituan.com/s/',
            '铜川市': 'https://tc.meituan.com/s/',
            '商洛市': 'https://sl.meituan.com/s/',
            '渭南市': 'https://wn.meituan.com/s/',
            '安康市': 'https://ankang.meituan.com/s/'
        }

        self.header = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.9",
            "Cache-Control":
            "no-cache",
            "Connection":
            "keep-alive",
            "Cookie":
            "__mta=108425917.1551143452201.1551143452201.1551316904069.2; iuuid=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _lxsdk_cuid=1653bf57b9e42-064eff1fa336df-252b1971-100200-1653bf57b9f61; _lxsdk=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _hc.v=7ef28d0d-ed51-ad2f-1b7f-662935ef4790.1541148357; webp=1; cityname=%E8%A5%BF%E5%AE%89; latlng=34.227458,108.882816,1550806592400; __utmz=74597006.1550806593.4.4.utmcsr=meishi.meituan.com|utmccn=(referral)|utmcmd=referral|utmcct=/i/poi/; i_extend=C_b1Gimthomepagecategory11H__a; _lx_utm=utm_source%3Dmeishi.meituan.com%26utm_medium%3Dreferral%26utm_content%3D%252Fi%252Fpoi%252F; __mta=108425917.1551143452201.1551143452201.1551143452201.1; uuid=95fdf18f9d77473398b1.1551316862.1.0.0; ci=358; rvct=358%2C356%2C359%2C819%2C772%2C357%2C355%2C360%2C352%2C354%2C353",
            "DNT":
            "1",
            "Host":
            "yl.meituan.com",
            "Pragma":
            "no-cache",
            "Referer":
            "https://yl.meituan.com/s/%E4%BA%8C%E9%A9%AC%E5%B8%88%E5%82%85/",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36",
        }
Example #5
0
功能描述: 百度地图 电子边框


"""
import time, json, os
from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import coordinateTranslate

# 初始化坐标系转化对象
coordTrans = coordinateTranslate.GPS()


def miToGPS(lon, lat):
    coord = coordTrans.convert_BD09MI_to_WGS84(float(lon), float(lat))
    return coord


def getUserAgent():
    ua = UserAgent()  # 初始化 随机'User-Agent' 方法
    tmpuserAnent = 'user-agent="' + ua.random + '"'
    return tmpuserAnent


def seleniumChromeInit():
    # 模拟创建一个浏览器对象,然后可以通过对象去操作浏览器
Example #6
0
    def __init__(self):
        # 查询城市code的url
        self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/'
        # 保存城市code的文件名
        self.cityCodeFile = './cityCode.csv'
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)

        # 保存城市的列表
        self.cityList = []

        # 当前城市
        self.city = {}
        self.cityAcronym = ""

        # 保存分类的 列表
        self.cateList = []

        # 保存子区域的列表
        self.areaList = []

        # 当前需要采集的url 列表
        self.openUrlList = []

        # poi 相信信息的列表
        self.PoiInfos = []

        # # 保存的 csv文件 路径 和 文件名称
        # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv'
        # # 保存的 采集进度的 文件名
        # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat'
        # #保存URL列表的文件
        # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv'

        # 坐标系转换模块
        gps = ct.GPS()
        self.coordTrans = gps.gcj_decrypt_exact

        self.citySearchUrl = {
            '西安市': 'https://xa.meituan.com/s/',
            '宝鸡市': 'https://baoji.meituan.com/s/',
            '咸阳市': 'https://xianyang.meituan.com/s/',
            '榆林市': 'https://yl.meituan.com/s/',
            '延安市': 'https://yanan.meituan.com/s/',
            u'汉中市': 'https://hanzhong.meituan.com/s/',
            '铜川市': 'https://tc.meituan.com/s/',
            '商洛市': 'https://sl.meituan.com/s/',
            '渭南市': 'https://wn.meituan.com/s/',
            '安康市': 'https://ankang.meituan.com/s/'
        }

        self.headers = {
            "Accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.9",
            "Cache-Control":
            "max-age=0",
            "Connection":
            "keep-alive",
            # "Cookie": "__mta=146243211.1551452178822.1551452178822.1551452270619.2; uuid=63ead3fae1b34c05957f.1551452154.1.0.0; ci=359; rvct=359; _lxsdk_cuid=16939c187b4c8-0f2c4d416da6c-42017773-100200-16939c187b460; _lxsdk_s=16939c187b6-557-aef-f46%7C%7C11",
            "Host":
            "www.meituan.com",
            "Upgrade-Insecure-Requests":
            "1",
            "User-Agent":
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36 Avast/65.0.411.162"
        }

        self.diguiCount = 0
Example #7
0
    def __init__(self):
        # 查询城市code的url
        self.cityCodeUrl = 'https://www.meituan.com/ptapi/getprovincecityinfo/'
        # 保存城市code的文件名
        self.cityCodeFile = './cityCode.csv'
        self.ua = UserAgent()  # 初始化 随机'User-Agent' 方法
        self.userAnent = 'user-agent="' + self.ua.random + '"'
        print(self.userAnent)

        # 保存城市的列表
        self.cityList = []

        # 当前城市
        self.city = {}
        self.cityAcronym = ""

        # 保存分类的 列表
        self.cateList = []

        # 保存子区域的列表
        self.areaList = []

        # 当前需要采集的url 列表
        self.openUrlList = []

        # poi 相信信息的列表
        self.PoiInfos = []

        # # 保存的 csv文件 路径 和 文件名称
        # self.csvFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.csv'
        # # 保存的 采集进度的 文件名
        # self.currFile = createNewDir.createDir(r'../meituan') + self.cityAcronym + '.dat'
        # #保存URL列表的文件
        # self.urlFile = createNewDir.createDir(r'../meituan_url_') + self.cityAcronym + '.csv'

        # 坐标系转换模块
        gps = ct.GPS()
        self.coordTrans = gps.gcj_decrypt_exact

        self.citySearchUrl = {
            '西安市': 'https://xa.meituan.com/s/',
            '宝鸡市': 'https://baoji.meituan.com/s/',
            '咸阳市': 'https://xianyang.meituan.com/s/',
            '榆林市': 'https://yl.meituan.com/s/',
            '延安市': 'https://yanan.meituan.com/s/',
            u'汉中市': 'https://hanzhong.meituan.com/s/',
            '铜川市': 'https://tc.meituan.com/s/',
            '商洛市': 'https://sl.meituan.com/s/',
            '渭南市': 'https://wn.meituan.com/s/',
            '安康市': 'https://ankang.meituan.com/s/'
        }

        self.headers = {
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "Accept-Encoding": "gzip, deflate, br",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "keep-alive",
            "Cookie": "__mta=20812423.1551143861269.1551447027375.1551500101694.5; iuuid=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _lxsdk_cuid=1653bf57b9e42-064eff1fa336df-252b1971-100200-1653bf57b9f61; _lxsdk=EE5893B5D2C219A684B1BA25271AB6F834A1DC9634EADB340BFDB6BDA984E46F; _hc.v=7ef28d0d-ed51-ad2f-1b7f-662935ef4790.1541148357; webp=1; cityname=%E6%B8%AD%E5%8D%97; i_extend=C_b1Gimthomepagecategory11H__a100005__b1; __utmz=74597006.1551318637.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); uuid=a75c37b0cb0b4e8d87a5.1551445379.1.0.0; ci=354; rvct=354%2C358%2C356%2C355%2C1155%2C359%2C819%2C772%2C357%2C360%2C352; __mta=20812423.1551143861269.1551143876409.1551500085679.3",
            "DNT": "1",
            "Host": "xianyang.meituan.com",
            "Referer": "https://xianyang.meituan.com/s/%E5%95%8A",
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36"
                    }
Example #8
0
    lineList[lonIndex], lineList[latIndex] = str(coord['lon']), str(
        coord['lat'])
    line = ",".join(lineList) + "\n"
    return line


if __name__ == "__main__":

    fileName = r'E:\工具\资料\宝鸡\研究\当前任务\fast\宝鸡\大众点评_宝鸡.csv'

    csvLines = readCsv(fileName)

    # 确定 lon 和 lat 在表中的列的 位置
    lonIndex, latIndex = getLonLatIndex(csvLines[0])

    tran = coordinateTranslate.GPS()
    tranCoord = tran.gcj_encrypt
    info = []
    for i, line in enumerate(csvLines[1:-1]):
        print(csvLines[i + 1])
        csvLines[i + 1] = transCoord(line, lonIndex, latIndex)
        info.append(csvLines[i + 1])
        if i / 100 == 1.0:
            with open(fileName + ".tran",
                      mode='a+',
                      encoding='gbk',
                      errors='ignore') as f:
                # 将采集进度写入文件
                f.writelines(info)
            info = []