def test_predict_province(): """预测省级地区""" location_str = ["秦皇岛北戴河融合小区11号", "秦皇岛北戴河区融合小区11号", ] print('-' * 42) df = addressparser.transform(location_str) print(df) df_str = string_dataframe(addressparser.transform(['秦皇岛北戴河区融合小区11号'])) print(df_str) assert df_str == '河北省秦皇岛市北戴河区'
def test_transform(): addr_list = ["徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "福建省鼓楼区鼓楼医院", "天津市"] # 分词模式 transed = addressparser.transform(addr_list) assert_addr(transed) # 全文匹配 transed = addressparser.transform(addr_list, cut=False, pos_sensitive=True) assert_addr(transed) # 分词匹配 测试pos_sensitive transed = addressparser.transform(addr_list, pos_sensitive=True) print(transed) assert_addr(transed, pos_sensitive=True)
def test_error_province(): """一级地名出错bug修复""" location_str = [ "宁波市江东区兴宁路42弄1号金汇大厦12楼", "天津空港经济区环河北路80号空港商务园东区", "龙华新区创业路汇海广场C座20楼", # 无匹配 "龙华创业路汇海广场C座20楼", "田林路140号越界创意园16号楼东402室", "上海市浦东新区东方路1365号5号楼24B", "上海浦东商城路1287号1幢5楼", "成都市高新区高朋大道12号府河电器孵化基地B座307 (永丰立交桥西)", "田林路388号1号楼新业大楼1楼西侧102室", "珠江新城广晟国际大厦801室", ] print('-' * 42) df = addressparser.transform(location_str) print(df) assert df.loc[0, '省'] == '浙江省' assert df.loc[1, '省'] == '天津市' assert df.loc[2, '省'] == '' assert df.loc[3, '省'] == '' assert df.loc[4, '省'] == '' assert df.loc[5, '省'] == '上海市' assert df.loc[6, '省'] == '上海市' assert df.loc[7, '省'] == '四川省' assert df.loc[8, '省'] == '' assert df.loc[9, '省'] == ''
def test_place(): """级联地名出错bug""" location_str = [ "天津空港经济区环河北路80号空港商务园东区", ] df = addressparser.transform(location_str) print(df)
def test_error_city_jilin(): """二级地名出错bug修复""" location_str = [ "吉林通化市辉南县一中", "吉林通化市辉南县11号", "吉林省通化市辉南县11号", "吉林白山市临江市城区吉林省临江市新市街道鸭绿江花园1号", "吉林白山市临江市城区新市街道鸭绿江花园1号", ] df = addressparser.transform(location_str) print(df)
def parseaddress(): data = request.get_json() result = [] # print("parseaddress data={}".format(data)) if not data: return json.dumps(result, ensure_ascii=False) try: df = addressparser.transform(data) except Exception as e: print("parseaddress exception", e) return json.dumps(result, ensure_ascii=False) for map_key in zip(df["省"], df["市"], df["区"], df["地址"]): result.append(list(map_key)) return json.dumps(result, ensure_ascii=False)
def test_error_city_hainan(): """二级地名出错bug修复""" location_str = [ "河北石家庄市桥西区校区公路11号", "海南白沙县金波乡金波乡金眉路2号", "海南乐东县九所镇乐东龙栖湾村东侧波波利海岸", "海南乐东黎族自治县九所镇乐东龙栖湾村东侧波波利海岸", "海南乐东县佛罗镇龙沐湾太阳商城C区7号楼", "海南保亭县保城镇七仙岭温泉国家森林公园温泉路8号龙湾雨林谷", "海南保亭县响水镇海南省保亭黎族苗族自治县响水镇2224国道西50米", "河南宽城县祥和小区22号", ] df = addressparser.transform(location_str) print(df)
def parse(addresses): """ Turns address list into province, city, country and street. :param addresses: list of address :return: list of province, city, country and street """ result = [] df = addressparser.transform(addresses, open_warning=False, cut=False) for map_key in zip(df["省"], df["市"], df["区"], df["地址"]): place = map_key[3] if not isinstance(place, str): place = '' result.append('\t'.join([map_key[0], map_key[1], map_key[2], place])) return result
def test_error_province(): """一级地名出错bug修复""" location_str = [ "宁波市江东区兴宁路42弄1号金汇大厦12楼", "天津空港经济区环河北路80号空港商务园东区", "龙华新区创业路汇海广场C座20楼", # 无匹配 "龙华创业路汇海广场C座20楼", "田林路140号越界创意园16号楼东402室", "上海浦东商城路1287号1幢5楼", "成都市高新区高朋大道12号府河电器孵化基地B座307 (永丰立交桥西)", "上海市浦东新区张杨路400号源一创意办公中心D101室", "田林路388号1号楼新业大楼1楼西侧102室", "上海市浦东新区东方路1365号5号楼24B", "珠江新城广晟国际大厦801室", ] print('-' * 42) df = addressparser.transform(location_str) print(df)
def test_error_area(): """3级地名出错bug""" print(addressparser.__version__) location_str = [ "北京市昌平区昌平路97号新元科技园B座504", "上海经静安区大田路靠近北京西路", "成都市高新区天府大道399号天府新谷", "青岛市市南区浙江路14号2楼", "重庆市渝北区新牌坊一路136号", "北京市朝阳区裕民路12号中国国际科技会展中心A座1005", "南京市江宁区润发路18号", "杭州市西湖区文一西路75号", "杭州市下城区朝晖路168号钛合国际A座1204室", ] print('-' * 42) df = addressparser.transform(location_str) print(df)
def test_simple_area(): """测试三级区划简称和二级区划简称的匹配""" location_str = ["上海市浦东新区虹漕路461号58号楼1楼", "上海市浦东区虹漕路461号58号楼2楼", "上海市浦东虹漕路461号58号楼3楼", "天津滨海祥和小区", "天津滨海区祥和小区", "天津滨海新区小何小区111号", # error with "区祥和小区" "北京丰台区小何小区111号", "孝感安陆小何小区111号", "广西南宁市江南区城区南国花园5号", "湖南益阳市安化县县城内湖南省益阳市安化县大福镇新桥", "广西南宁市江南区城区南国花园5栋", "湖北武汉武昌区复兴路1号", "山西晋城市城区开发区怡凤小区凤巢小学对面10号楼" ] df = addressparser.transform(location_str) print(df)
def test_error_area(): """3级地名测试""" print(addressparser.__version__) print('-' * 42) location_str = [ "北京市昌平区昌平路97号新元科技园B座504", "上海经静安区大田路靠近北京西路", "青岛市市南区浙江路14号2楼", "北京市朝阳区裕民路12号中国国际科技会展中心A座1005", "杭州市下城区朝晖路168号钛合国际A座1204室", ] addr_df = addressparser.transform(location_str, pos_sensitive=True) print(addr_df) def _assert_line(linenum, province, city, area, addr, province_pos=-1, city_pos=-1, area_pos=-1): assert addr_df.loc[linenum, '省'] == province assert addr_df.loc[linenum, '市'] == city assert addr_df.loc[linenum, '区'] == area assert addr_df.loc[linenum, '地名'] == addr assert addr_df.loc[linenum, '省_pos'] == province_pos assert addr_df.loc[linenum, '市_pos'] == city_pos assert addr_df.loc[linenum, '区_pos'] == area_pos _assert_line(0, '北京市', '北京市', '昌平区', '昌平路97号新元科技园B座504', 0, 0, 3) _assert_line(1, '上海市', '上海市', '静安区', '经静安区大田路靠近北京西路', 0, 0, 3) _assert_line(2, '山东省', '青岛市', '市南区', '浙江路14号2楼', -1, 0, 3) _assert_line(3, '北京市', '北京市', '朝阳区', '裕民路12号中国国际科技会展中心A座1005', 0, 0, 3) _assert_line(4, '浙江省', '杭州市', '下城区', '朝晖路168号钛合国际A座1204室', -1, 0, 3)
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import addressparser if __name__ == '__main__': location_str = [ "徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城", "上海浦东新区城区昌邑路1669弄7号602(苗圃路口)", "湖北天门市渔薪镇湖北省天门市三渔薪镇王湾村六组", "收货人:xxx, 地址:湖北恩施州建始县业州镇湖北省建始县桂苑小区二单元111-2, 电话:13593643115", "收货人:木鱼, 地址:浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号, 电话:13593643115", ] df = addressparser.transform(location_str, cut=False) print(df) for map_key in zip(df["省"], df["市"], df["区"]): print(map_key)
""" @author:XuMing([email protected]) @description: """ import addressparser if __name__ == '__main__': location_str = [ "徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城", "襄阳市建锦路丽江泊林小区11栋4单元1楼2号", "上海浦东新区城区昌邑路1669弄7号602(苗圃路口)", "湖北天门市渔薪镇湖北省天门市三渔薪镇王湾村六组", "收货人:xxx, 地址:湖北恩施州建始县业州镇湖北省建始县桂苑小区二单元111-2, 电话:13593643115", "收货人:木鱼, 地址:浙江嘉兴市浙江嘉兴市浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号, 电话:13593643115", ] df = addressparser.transform([ "襄阳市建锦路丽江泊林小区11栋4单元1楼2号", "浙江嘉兴市浙江嘉兴市浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号", "大红门 / 北京市-丰台区" ], cut=False) # print(type(df)) # print(df) # df = addressparser.transform(location_str, cut=False) # # print(df) # for map_key in zip(df["省"], df["市"], df["区"], df["地址"]): print(list(map_key))
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append('..') import addressparser if __name__ == '__main__': lines = [] for line in sys.stdin: i = line.strip() lines.append(i) df = addressparser.transform(lines) for map_key in zip(lines, df["省"], df["市"], df["区"], df["地名"]): print(','.join([i for i in map_key]))
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys import pandas as pd sys.path.append('..') import addressparser from addressparser import drawer if __name__ == '__main__': origin = pd.read_csv("../tests/addr.csv") # 转换 addr_df = addressparser.transform(origin["#原始地址"]) # 输出 processed = pd.concat([origin, addr_df], axis=1) processed.to_csv("processed.csv", index=False, encoding="utf-8") drawer.echarts_draw(addr_df, "df_echarts.html", title="地域分布图", subtitle="location distribute") drawer.echarts_cate_draw(addr_df, addr_df['省'], "df_echarts_cate.html")
import os import sys import pandas as pd sys.path.append('..') import addressparser from addressparser import drawer origin_addr = pd.read_csv(os.path.join(os.path.dirname(__file__), 'addr.csv')) df = addressparser.transform(origin_addr['原始地址']) def test_draw_locations(): """使用folium绘制热力图""" drawer.draw_locations(df, "df.html") def test_echarts_draw(): """使用echarts绘制热力图""" drawer.echarts_draw(df, "df_echarts.html", title="地域分布图", subtitle="location distribute") def test_echarts_cate_draw(): """使用echarts绘制分类散点图""" drawer.echarts_cate_draw(df, df['省'], "df_echarts_cate.html")
def test_city_detail_1(): df_str = string_dataframe(addressparser.transform(['天津滨海新区小何小区111号'])) print(df_str) assert df_str == '天津市天津市滨海新区'
def test_city_detail(): df_str = string_dataframe(addressparser.transform(['上海市浦东虹漕路461号58号楼5楼'])) print(df_str) assert df_str == '上海市上海市浦东新区'
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append('..') import addressparser if __name__ == '__main__': location_str = ["徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城", "上海浦东新区城区昌邑路1669弄7号602(苗圃路口)", "湖北天门市渔薪镇湖北省天门市三渔薪镇王湾村六组", "收货人:xxx, 地址:湖北恩施州建始县业州镇湖北省建始县桂苑小区二单元111-2, 电话:1359", "收货人:木鱼, 地址:浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号, 电话:135936", ] df = addressparser.transform(location_str) print(df) for map_key in zip(df["省"], df["市"], df["区"]): print(map_key) for map_key in zip(df["省"], df["市"], df["区"]): print(' '.join([i for i in map_key]))
def test_city_detail_2(): df_str = string_dataframe(addressparser.transform(['孝感安陆小何小区111号'])) print(df_str) assert df_str == '湖北省孝感市安陆市'
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append('..') import addressparser if __name__ == '__main__': location_str = ["徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城", "上海浦东新区城区昌邑路1669弄7号602(苗圃路口)", "湖北天门市渔薪镇湖北省天门市三渔薪镇王湾村六组", "收货人:xxx, 地址:湖北恩施州建始县业州镇湖北省建始县桂苑小区二单元111-2, 电话:1359", "收货人:木鱼, 地址:浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号, 电话:135936", ] df = addressparser.transform(location_str, cut=True) print(df) for map_key in zip(df["省"], df["市"], df["区"]): print(map_key) for map_key in zip(df["省"], df["市"], df["区"]): print(' '.join([i for i in map_key]))
def test_city_detail_3(): df_str = string_dataframe(addressparser.transform(['山西晋城市城区开发区怡凤小区凤巢小学对面10号楼'])) print(df_str)
# -*- coding: utf-8 -*- """ @author:XuMing([email protected]) @description: """ import sys sys.path.append('..') import addressparser if __name__ == '__main__': location_str = [ "徐汇区虹漕路461号58号楼5楼", "泉州市洛江区万安塘西工业区", "朝阳区北苑华贸城", "上海浦东新区城区昌邑路1669弄7号602(苗圃路口)", "湖北天门市渔薪镇湖北省天门市三渔薪镇王湾村六组", "收货人:xxx, 地址:湖北恩施州建始县业州镇湖北省建始县桂苑小区二单元111-2, 电话:13593643115", "收货人:木鱼, 地址:浙江嘉兴市海宁市许村镇浙江省海宁市许村镇茗山村徐家石桥1号, 电话:13593643115", ] df = addressparser.transform(location_str, cut=False, pos_sensitive=True) print(df)