def batch_recongnize(): global page index_position = 0 totalpage = 1 region_arr = [] while True: if page > totalpage: break totalpage, items = get_region_by_page(page) print(f"【page={page}】") print(f"【totalpage={totalpage}】") page += 1 if items and totalpage > 0: for item in items: address = { "departmentId": str(item['department_id']), "city": item['city'], "region": item['region'], "department": item['title'], "road": item['road'] } region_arr.append(address) index_position += 1 region_str = item['title'] print(f"【location_str.{index_position}={region_str}】") path = f'{time_util.now_to_date("%Y-%m-%d")}_dict_department_region_all.csv' keys = csv_util.get_head_from_arr(region_arr) csv_util.create_csv(path, keys, force=True) csv_util.append_csv(path, region_arr)
def extra_region(path='2021-03-29_dict_department_shanghai.csv'): # 2.读csv示例 i = 0 data = csv_util.read_csv2array(path) array = [] error_array = [] for index, item in enumerate(data): region = item['小区'] # region = trim_region(region) if region: address = {} address['departmentId'] = index address['city'] = item['城市'] address['region'] = item['区域'] address['department'] = item['小区'] roadStr = item['道路号'] if roadStr: index = roadStr.index(")") roadStr = str.strip(roadStr[index + 1:]).replace(',', ',') address['road'] = roadStr else: address['road'] = '' if not has_error(region): array.append(address) i = i + 1 print(f"【main({i}).road={address['city']}.{region}】") else: error_array.append(address) path = 'dict_department_region.csv' keys = csv_util.get_head_from_arr(array) print(f"【main().keys={keys}】") csv_util.create_csv(path, keys, force=True) csv_util.append_csv(path, array) path = 'dict_department_region_error.csv' keys = csv_util.get_head_from_arr(error_array) print(f"【main().keys={keys}】") csv_util.create_csv(path, keys, force=True) csv_util.append_csv(path, error_array)
def batch_recongnize(): global page index_position = 0 totalpage = 1 location_arr = [] md5_contents = [] while True: if page > totalpage: break totalpage, items = get_moment_by_page(page) print(f"【page={page}】") print(f"【totalpage={totalpage}】") if items and totalpage > 0: for item in items: location_str = item['content'].replace('\n', '').replace('\r', '') print(f"【location_str.{index_position}={location_str}】") index_position += 1 location_arr.append(location_str) md5_contents.append(item['content_md5']) page += 1 addresss = get_address_by_custom_lac(location_arr) address_arr = [] for index, item in enumerate(addresss): loc_from = getLocFrom(item) if loc_from: print(f"【文本内容:{location_arr[index]}】") print(f"【检测到地址:{loc_from}】") print(f"【检测到地址:{'/'.join(loc_from)}】") item = {} item['md5_content'] = md5_contents[index] item['address'] = '/'.join(loc_from) address_arr.append(item) path = f"address_{time_util.now_to_date('%Y-%m-%d')}.csv" keys = csv_util.get_head_from_arr(address_arr) if not os.path.exists(path): csv_util.create_csv(path, keys) csv_util.append_csv(path, address_arr)
load_dict = json.load(load_f) return load_dict def extra_region(arr): regions = [] citys = [] for index, region in enumerate(arr): address = f"{region['city']}|{region['region']}" regions.append(address) regions_norepeat = list(set(regions)) # dumps = json.dumps(regions_norepeat, indent=4, ensure_ascii=False) # print(f"【extra_region({len(regions_norepeat)}条).regions_norepeat={dumps}】") return regions_norepeat if __name__ == '__main__': count = 0 # batch_recongnize() arr = read_csv2array("2021-02-08_dict_department_region_all.csv") region_arr = [] for item in arr: if '上海' in item['city'] and not "上海周边" in item['region']: region_arr.append(item) count = count + 1 print(f"【().count={count}】") path = f'{time_util.now_to_date("%Y-%m-%d")}_dict_department_shanghai.csv' keys = csv_util.get_head_from_arr(region_arr) csv_util.create_csv(path, keys, force=True) csv_util.append_csv(path, region_arr)
if __name__ == '__main__': full_dir = FilePathUtil.get_full_dir("./common/20210207wx_contacts_moments.xls") array = excel2array(full_dir, "20210207") array_new = [] for item in array: if item['phone']: contact = {} # nick_name, wx_number, phone contact["姓名"] = item['nick_name'] contact["电话"] = item['phone'] contact["备注"] = item['wx_number'] array_new.append(contact) path = f'{time_util.now_to_date("%Y-%m-%d")}wx_contacts_moments.csv' keys = csv_util.get_head_from_arr(array_new) csv_util.create_csv(path, keys, force=True, encoding=ENCODING_GBK) csv_util.append_csv(path, array_new, encoding=ENCODING_GBK) # datas = [{'id': 1, 'text': "dsd", "abs": 1}, # {'id': 2, 'text': "dsd2"}, # {'id': 3, 'text': "dsd3"}] # write_excel(full_dir, 'test', datas) # full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "text") # full_dir = FilePathUtil.get_lastmodify_file(full_dir) # appendData(full_dir, datas) # full_dir = FilePathUtil.get_lastmodify_file( FilePathUtil.get_full_dir("wxfriend", "excel", "pic")) # get_full_dir = FilePathUtil.get_full_dir("wxfriend", "excel", "pic", 'test.xls') # utf16leToUtf8(full_dir, get_full_dir) # content = "🔥《臻水岸》🔥类独栋🌼6米客厅挑空.预留电梯井.🏠带花园、双车位、南北双露台🚗毛坯.款清交房.最后10套💰总价约1300-1500万左右🎁稀缺房源,错过再无看房热线:15921824193" # encode = content.encode(encoding='utf-8') # decode = encode.decode(encoding="utf-8") # print(f'{decode}')
def extra_road(): # 2.读csv示例 i = 0 data = csv_util.read_csv2array("2021-03-29_dict_department_shanghai.csv") array = [] road_strip_array = [] for index_position, item in enumerate(data): road = item['road'] if road: index = road.index(")") roadStr = str.strip(road[index + 1:]).replace(',', ',') split = [roadStr] if ',' in roadStr: split = roadStr.split(",") elif ';' in roadStr: split = roadStr.split(";") # print(f"【main(分号分割).roadStr={roadStr}】") for split_item in split: strip = str.strip(split_item) strip = str.strip(split_item).replace('(', '').replace(')', '') \ .replace('(', '').replace(')', '') road_strip = strip.strip() if road_strip: # if len(road_strip) > 0 and ('(' in road_strip or '(' in road_strip): # i = i + 1 # address = {} # address['id'] = index_position # address['road'] = road_strip # # address['department'] = road_strip # array.append(address) # print(f"【is_error_format({i}).road={road_strip}】") road_strip = trim_road(road_strip) # if is_error_format(road_strip): # i = i + 1 # print(f"【is_error_format({i}).road={road_strip}】") # continue road_strip = road_strip.replace('(', '').replace(')', '') \ .replace('(', '').replace(')', '') if is_road_type( road_strip) and not road_strip in road_strip_array: # if '上海' in item['城市']: if len(road_strip) < 11: address = {} address['id'] = index_position address['city'] = item['city'] address['region'] = item['region'] address['road'] = road_strip road_strip_array.append(road_strip) address['department'] = item['department'] array.append(address) else: # i = i + 1 # print(f"【main({i}).road={road_strip}】") pass else: pass print(f"没有道路号的小区【{index_position}.{item}】") address = {} address['id'] = index_position address['city'] = item['city'] address['region'] = item['region'] address['road'] = item['department'] road_strip_array.append(address['road']) address['department'] = item['department'] array.append(address) # 1.写csv示例 # array = [{'name': '小红', 'sex': 12}, {'name': '小王', 'sex': 112}] path = 'dict_department_road_street.csv' # path = 'dict_department_region.csv' keys = csv_util.get_head_from_arr(array) print(f"【main().keys={keys}】") csv_util.create_csv(path, keys, force=True) csv_util.append_csv(path, array)