def get_data(city, keyword, coord, key): # 1. 获取城市边界的最大、最小经纬度 max_lng, min_lng, max_lat, min_lat = area_boundary.getlnglat(city, key) print('当前城市:', city, "max_lng, min_lng, max_lat, min_lat:", max_lng, min_lng, max_lat, min_lat) # 2. 生成网格切片格式: ''' [[112.23, 23.23, 112.24, 23.22], [112.23, 23.22, 112.24, 23.21]] ''' ''' grids_lib = [] grids_lib = get_drids(min_lng, max_lat, max_lng, min_lat, '170200', '4188efb67360681f89110ccdb11e563b', pology_split_distance, grids_lib) ''' grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat, pology_split_distance) print('划分后的网格数:', len(grids_lib)) print(grids_lib) all_data = [] begin_time = time.time() print( '==========================正式开始爬取啦!!!!!!!!!!!================================' ) for grid in grids_lib: # grid格式:[112.23, 23.23, 112.24, 23.22] one_pology_data = getpois(grid, keyword, key) print('===================================当前矩形范围:', grid, '总共:', str(len(one_pology_data)) + "条数据.............................") all_data.extend(one_pology_data) end_time = time.time() print('全部:', str(len(grids_lib)) + '个矩形范围', '总的', str(len(all_data)), '条数据, 耗时:', str(end_time - begin_time), '正在写入EXCEL中') return write_to_excel(all_data, city, keyword, coord)
def get_data(city, keyword, coord): # 1. 获取城市边界的最大、最小经纬度 amap_key = buffer_keys[0] # 总是获取队列中的第一个密钥 max_lng, min_lng, max_lat, min_lat = area_boundary.getlnglat( city, amap_key) print('当前城市:', city, "max_lng, min_lng, max_lat, min_lat:", max_lng, min_lng, max_lat, min_lat) # 2. 生成网格切片格式: grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat, pology_split_distance) print('划分后的网格数:', len(grids_lib)) print(grids_lib) all_data = [] begin_time = time.time() print( '==========================正式开始爬取啦!!!!!!!!!!!================================' ) for grid in grids_lib: # grid格式:[112.23, 23.23, 112.24, 23.22] one_pology_data = getpois(grid, keyword) print('===================================当前矩形范围:', grid, '总共:', str(len(one_pology_data)) + "条数据.............................") all_data.extend(one_pology_data) end_time = time.time() print('全部:', str(len(grids_lib)) + '个矩形范围', '总的', str(len(all_data)), '条数据, 耗时:', str(end_time - begin_time), '正在写入CSV文件中') file_folder, file_name = write_to_csv(all_data, city, keyword, coord) # 写入shp if file_folder is not None: trans_point_to_shp(file_folder, file_name, 0, 1, pology_split_distance, keyword)
def get_drids(min_lng, max_lat, max_lng, min_lat, keyword, key, pology_split_distance, all_grids): grids_lib = city_grid.generate_grids(min_lng, max_lat, max_lng, min_lat, pology_split_distance) print('划分后的网格数:', len(grids_lib)) print(grids_lib) # 3. 根据生成的网格爬取数据,验证网格大小是否合适,如果不合适的话,需要继续切分网格 for grid in grids_lib: one_pology_data = getpoi_page(grid, keyword, 1, key) data = json.loads(one_pology_data) print(data) while int(data['count']) > 890: get_drids(grid[0], grid[1], grid[2], grid[3], keyword, key, pology_split_distance / 2, all_grids) all_grids.append(grid) return all_grids