예제 #1
0
파일: app.py 프로젝트: leegle/poi
def get_data(city, keyword, coord, key):

    # 1. 获取城市边界的最大、最小经纬度
    max_lng, min_lng, max_lat, min_lat = area_boundary.getlnglat(city, key)

    print('当前城市:', city, "max_lng, min_lng, max_lat, min_lat:", max_lng,
          min_lng, max_lat, min_lat)

    # 2. 生成网格切片格式:
    '''
    [[112.23, 23.23, 112.24, 23.22], [112.23, 23.22, 112.24, 23.21]]
    '''
    '''
    grids_lib = []
    grids_lib = get_drids(min_lng, max_lat, max_lng, min_lat, '170200', '4188efb67360681f89110ccdb11e563b',
                          pology_split_distance, grids_lib)
                          '''

    grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat,
                                         pology_split_distance)

    print('划分后的网格数:', len(grids_lib))
    print(grids_lib)

    all_data = []
    begin_time = time.time()

    print(
        '==========================正式开始爬取啦!!!!!!!!!!!================================'
    )

    for grid in grids_lib:
        # grid格式:[112.23, 23.23, 112.24, 23.22]
        one_pology_data = getpois(grid, keyword, key)

        print('===================================当前矩形范围:', grid, '总共:',
              str(len(one_pology_data)) + "条数据.............................")

        all_data.extend(one_pology_data)

    end_time = time.time()
    print('全部:',
          str(len(grids_lib)) + '个矩形范围', '总的', str(len(all_data)), '条数据, 耗时:',
          str(end_time - begin_time), '正在写入EXCEL中')
    return write_to_excel(all_data, city, keyword, coord)
예제 #2
0
def get_data(city, keyword, coord):
    # 1. 获取城市边界的最大、最小经纬度
    amap_key = buffer_keys[0]  # 总是获取队列中的第一个密钥
    max_lng, min_lng, max_lat, min_lat = area_boundary.getlnglat(
        city, amap_key)

    print('当前城市:', city, "max_lng, min_lng, max_lat, min_lat:", max_lng,
          min_lng, max_lat, min_lat)

    # 2. 生成网格切片格式:

    grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat,
                                         pology_split_distance)

    print('划分后的网格数:', len(grids_lib))
    print(grids_lib)

    all_data = []
    begin_time = time.time()

    print(
        '==========================正式开始爬取啦!!!!!!!!!!!================================'
    )

    for grid in grids_lib:
        # grid格式:[112.23, 23.23, 112.24, 23.22]
        one_pology_data = getpois(grid, keyword)

        print('===================================当前矩形范围:', grid, '总共:',
              str(len(one_pology_data)) + "条数据.............................")

        all_data.extend(one_pology_data)

    end_time = time.time()
    print('全部:',
          str(len(grids_lib)) + '个矩形范围', '总的', str(len(all_data)), '条数据, 耗时:',
          str(end_time - begin_time), '正在写入CSV文件中')
    file_folder, file_name = write_to_csv(all_data, city, keyword, coord)
    # 写入shp
    if file_folder is not None:
        trans_point_to_shp(file_folder, file_name, 0, 1, pology_split_distance,
                           keyword)
예제 #3
0
def get_drids(min_lng, max_lat, max_lng, min_lat, keyword, key,
              pology_split_distance, all_grids):
    grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat,
                                         pology_split_distance)

    print('划分后的网格数:', len(grids_lib))
    print(grids_lib)

    # 3. 根据生成的网格爬取数据,验证网格大小是否合适,如果不合适的话,需要继续切分网格
    for grid in grids_lib:
        one_pology_data = getpoi_page(grid, keyword, 1, key)
        data = json.loads(one_pology_data)
        print(data)

        while int(data['count']) > 890:
            get_drids(grid[0], grid[1], grid[2], grid[3], keyword, key,
                      pology_split_distance / 2, all_grids)

        all_grids.append(grid)
    return all_grids