def down_load_data_by_polygon(): # 解析配置文件 CONF.parse_from_file('config/config.yml') city_codes = [ 116.34459731000003, 40.10685308500017, 116.63942421000012, 39.80852284500011 ] i = 0 AmapSpider.write_json_before() while city_codes[0] + 0.2 * i < city_codes[2]: temp_code = [ city_codes[0] + 0.2 * i, 0, city_codes[0] + 0.2 * i + 0.2, 0 ] j = 0 while city_codes[3] + 0.2 * j < city_codes[1]: temp_code[1] = city_codes[3] + 0.2 + 0.2 * j temp_code[3] = city_codes[3] + 0.2 * j hospital_count = 0 for spider_type in CONF.types: if hospital_count != 0: connect_char = ',' else: connect_char = '' amap_spider = AmapSpider(CONF.key, spider_type, temp_code, connect_char, 'polygon') hospital_count = amap_spider.get_poi_data() print(temp_code) j += 1 i += 1 AmapSpider.write_json_after() AmapSpider.write_data_to_excel('temp_data3/{0}'.format('test'))
def patch_data_deduplication(): CONF.parse_from_file('../config/config.yml') for filename in os.listdir('../data'): if filename.endswith('.xls'): print(filename) FileProcessorUtil.data_deduplication_hospital( filename, '../data', '../data_finish') FileProcessorUtil.dirty_data_output(filename, '../data', '../data_dirty')
def patch_data_clean(): CONF.parse_from_file('../config/config.yml') order_list = [ '综合医院', '专科医院', '急救中心', '医疗保健服务场所', '诊所', '疾病预防机构', '医药保健销售店', '动物医疗场所' ] for filename in os.listdir('../data'): if filename.endswith('.xls'): print('../data/' + filename) FileProcessorUtil.clean_data('../data/' + filename, order_list)
def main(): # 解析配置文件 CONF.parse_from_file('config.yml') print(CONF.key) city_code_item = CityCode() city_codes = city_code_item.fetch_city_code_for_spider() for city_code in city_codes: download_each_city(city_code)
def down_load_data(): # 解析配置文件 CONF.parse_from_file('config/config.yml') print(CONF.key) # 南京市 city_codes = [ '320102', '320104', '320105', '320106', '320111', '320113', '320114', '320115', '320116', '320117', '320118' ] # 合肥市 # city_codes = ['340102','340103','340104','340111','340121','340122','340123','340124','340181'] # 徐州市 # city_codes = ['320302','320303','320305','320311','320312','320321','320322','320324','320381','320382'] # 苏州市 # city_codes = ['320505','320506','320507','320508','320509','320581','320582','320583','320585'] for city_code in city_codes: download_each_city(city_code)
def split_data(filename, output_path): CONF.parse_from_file('../config/config.yml') if not os.path.exists(output_path): os.mkdir(output_path) pd_datas = pd.read_csv(filename, encoding='utf-8') print(pd_datas) city_codes = [k for k, v in city_dict.items()] for city_code in city_codes: if city_code == '440800': continue if city_code == '510700': continue print(city_code) tmp_data = pd_datas[pd_datas['City_Code'] == int(city_code)] tmp_data.to_csv(output_path + '\\' + city_code + '.csv', index=None, encoding='gbk') print(city_codes) pass
val = val.split(',') sheet.write(n, index + offset, val[0]) offset = 3 sheet.write(n, index + offset, val[1]) else: sheet.write(n, index + offset, val) wbk.save(name + '.xls') print('保存到excel文件: ' + name + '.xls !') @staticmethod def write_json_before(): with open(CONF.json_name, 'w') as f: f.write('[') @staticmethod def write_json_after(): with open(CONF.json_name, 'a') as f: f.write(']') @staticmethod def write_json_spliter(): with open(CONF.json_name, 'a') as f: f.write(',') if __name__ == '__main__': CONF.parse_from_file('../config.yml') amap_spider = AmapSpider(CONF.key, CONF.types[0], '110107') # amap_spider.get_poi_data() amap_spider.write_data_to_excel('../data/{0}'.format('110107'))
def combine_data(): CONF.parse_from_file('../config/config.yml') # FileProcessorUtil.combine_path_file(r'../data_finish', '../China_hospital_finish.csv') FileProcessorUtil.combine_path_file(r'../data_finish', '../China_hospital_20200719.csv')