def __init__(self): self.admin_db = AdminCodeDatabase() self.admin_data = AdminData() self.period = self.admin_db.period self.Province = self.admin_data.Province self.version = list() for y in self.period: self.version.extend(self.admin_db.version(y))
def __init__(self, version=None, year=None): Data.__init__(self) # 设置数据库 self.database = AdminCodeDatabase() # 设置最新版本 self.latestversion = self.database.version()[-1] # 设置版本和年份 if (version is None) and (year is None): self.version = self.latestversion self.year = re.split('_', self.version)[0] elif version is not None: self.version = version self.year = re.split('_', self.version)[0] else: self.year = str(year) self.version = self.database.version(self.year)[-1]
class AdminCodeCheck: def __init__(self): self.admin_db = AdminCodeDatabase() self.admin_data = AdminData() self.period = self.admin_db.period self.Province = self.admin_data.Province self.version = list() for y in self.period: self.version.extend(self.admin_db.version(y)) # 测试省级区划 def admin_checker(self,level='s'): no = 1 for ver in sorted(self.version): self.admin_data.set_version(ver) self.admin_division = self.admin_data[level] admin_division_data = [[p['acode'],p['region']] for p in self.admin_division] one_result = pd.DataFrame(admin_division_data,columns=['acode',ver]) one_result = one_result.set_index('acode') if no == 1: result = one_result else: result = pd.merge(result,one_result,left_index=True,right_index=True,how='outer') no = no + 1 return result # 另一个角度测试,从每个省级开始 def admin_division_checker(self,province=None): result_prefectures = None result_counties = dict() result_counties_alone = None for ver in sorted(self.version): self.admin_data.set_version(ver) # 省级行政区划 province_record = self.admin_data[province] if len(province_record) < 1: continue # 地级行政区划 prefecture_records = self.admin_data[tuple([province_record[0]['region'],'f'])] prefectures = [[p['acode'],p['region']] for p in prefecture_records] one_result = pd.DataFrame(prefectures,columns=['acode',ver]) one_result = one_result.set_index('acode') if result_prefectures is None: result_prefectures = one_result else: result_prefectures = pd.merge(result_prefectures,one_result,left_index=True,right_index=True,how='outer') # 县级行政区划 for pre in prefectures: county_records = self.admin_data[tuple([province_record[0]['region'],pre[1],'f'])] counties = [[p['acode'],p['region']] for p in county_records] one_result = pd.DataFrame(counties,columns=['acode',ver]) one_result = one_result.set_index('acode') if result_counties.get(pre[0]) is None: result_counties[pre[0]] = one_result else: result_counties[pre[0]] = pd.merge(result_counties[pre[0]],one_result,left_index=True,right_index=True,how='outer') county_alone_records = self.admin_data.get_county_children(province=province_record[0]['region'],without_prefecture=True) counties_alone = [[p['acode'],p['region']] for p in county_alone_records] one_result_alone = pd.DataFrame(counties_alone,columns=['acode',ver]) one_result_alone = one_result_alone.set_index('acode') if result_counties_alone is None: result_counties_alone = one_result else: result_counties_alone = pd.merge(result_counties_alone,one_result_alone,left_index=True,right_index=True,how='outer') return {'prefectures':result_prefectures,'counties_with_prefecture':result_counties,'counties_without_prefecture':result_counties_alone}
class AdminData: '''AdminData类表示行政区域数据 :param str version: 颁布的行政区划的版本号 :param int,str year: 颁布的行政区划的年份 :var AdminDatabase database: 行政区划数据库 :var str latestversion: 最新颁布的行政区划版本号 :var str year: 年份 :var str version: 版本号 :var list Province: 某个版本的所有省级行政区划 :var list Prefecture: 某个版本的所有地级行政区划 :var list County: 某个版本的所有县级行政区划 :var list ProvincePrefecture: 某个版本的所有省级和地级行政区划 :var list ProvincePrefectureCounty: 某个版本的所有省级地级县级区划 通用接口__getitem__ :param str,list key: 查询条件。其中的key表示表示区域名称,方法如下: (1)省级区域,用名字直接表示,例如ad[u'北京']; (2)地级区域,用省级、地级名称表示,例如ad[u'浙江',u'嘉兴'];(3)县级区域,用省级、地级、县级名称表示,例如ad[u'湖北', u'恩施',u'来凤']; (4)如果是本级行政区域加上下级行政区域,下级行政区域用f(first)表示,例如表示浙江省及其所有地级行政区域,用ad[u'浙江',u'f']; (5)如果是本级行政区域加上下下级行政区域,下下级行政区域用s(second)表示,例如表示浙江省及其所有县级行政区域,用ad[u'浙江',u's']; (6)如果是本级行政区域加上下级及下下级行政区域,下级及下下级行政区域用b(both)表示,例如表示浙江省及其所有地县级行政区域,用ad[u'浙江',u'b']。返回值是数据库中查询得到的行政区划的列表。 ''' def __init__(self, version=None, year=None): Data.__init__(self) # 设置数据库 self.database = AdminCodeDatabase() # 设置最新版本 self.latestversion = self.database.version()[-1] # 设置版本和年份 if (version is None) and (year is None): self.version = self.latestversion self.year = re.split('_', self.version)[0] elif version is not None: self.version = version self.year = re.split('_', self.version)[0] else: self.year = str(year) self.version = self.database.version(self.year)[-1] # 通用行政区划查询接口 def __getitem__(self, key): '''通用行政区划查询接口 :param key: :return: 查询结果 ''' # f to get all first level # s to get all second level # b to get all first and second level if isinstance(key, str): if re.match('^s$', key): return self.Province if re.match('^t$', key): return self.Prefecture if re.match('^f$', key): return self.County return self.get_province_by_name(key) if isinstance(key, tuple) and len(key) < 2: if re.match('^s$', key[0]): return self.Province if re.match('^t$', key[0]): return self.Prefecture if re.match('^f$', key[0]): return self.County return self.get_province_by_name(key[0]) if isinstance(key, tuple): if len(key) < 3: if re.match(key[1], u'f') is not None: result = self.get_prefecture_children(key[0]) return self._sorted(result) elif re.match(key[1], u's') is not None: result = self.get_county_children(key[0],with_prefecture=False) return self._sorted(result) elif re.match(key[1], u'b') is not None: result = self.get_county_children(key[0],with_prefecture=True) return self._sorted(result) else: return self.get_prefecture_by_name(key[0], key[1]) else: if re.match(key[2], u'f') is not None: result = self.get_county_children(key[0], key[1]) return self._sorted(result) else: return self.get_county_by_name(key[0], key[1], key[2]) def get_by_acode(self, acode, year=None): '''通过行政区划代码(acode)查询行政区划 :param str acode: 行政区划代码 :param int,str year: 年份 :return: 查询结果 :rtype: list ''' if year is None: return list(self.database.find(acode=acode, version=self.version)) else: version = self.database.version(year)[-1] return list(self.database.find(acode=acode, version=version)) def get_province_by_name(self, province): '''通过省级名称查询行政区划 :param str province: 省级行政区划名称 :return: 省级行政区划单位 :rtype: list ''' province_pattern = u'省|市|自治区|维吾尔自治区|回族自治区|壮族自治区' province = re.split(province_pattern, re.sub('\s+', '', province))[0] mprovince = '^' + province + '$' result = [item for item in self.Province if re.match( mprovince, re.split(province_pattern, item['region'])[0]) is not None] if len(result) < 1: return [] return result def get_prefecture_by_name(self, province, prefecture): '''通过省级地级名称查询行政区划 :param str province: 省级行政区划名称 :param str prefecture: 地级行政区划名称 :return: 地级行政区划单位 :rtype: list ''' prefectures = self.get_prefecture_children(province) result = [item for item in prefectures if re.fullmatch( prefecture, item['region']) is not None] if len(result) < 1: result = [item for item in prefectures if re.match(prefecture, item['region']) is not None] if len(result) < 1: return [] return result # 获得一个县级单位 def get_county_by_name(self, province, prefecture, county): '''通过省级地级县级名称查询行政区划 :param str province: 省级行政区划名称 :param str prefecture: 地级行政区划名称 :param str county: 县级行政区划名称 :return: 县级行政区划单位 :rtype: list ''' counties = self.get_county_children(province, prefecture) result = [item for item in counties if re.match( county, item['region']) is not None] if len(result) < 1: return [] return result def get_prefecture_children(self, province): '''通过省级名称获得其辖区下的所有地级行政区划 :param str province: 省级行政区划的名称 :return: 某省级单位辖区下的所有地级区划单位 :rtype: list ''' province_found = self.get_province_by_name(province) if len(province_found) < 1: print('Can not find ', province) raise NameError if len(province_found) > 1: print('Tow much: ', province) raise NameError prefecture = self.database.find(parent=province_found[0][ '_id'], version=self.version, sorts=[('acode', ASCENDING)]) return list(prefecture) def get_county_children(self, province=None, prefecture=None,with_prefecture=False,without_prefecture=False): '''通过省级和地级区域名称获得其辖区下的所有县级行政区划 :param str province: 省级行政区划名称 :param str prefecture: 地级行政区划名称 :return: 某个地级行政单位辖区下的所有县级区划单位 :rtype: list ''' if province is None: print('You must provide a Province Name') raise NameError if prefecture is None: province_id = self.get_province_by_name(province)[0]['_id'] county = list() prefectures = self.get_prefecture_children(province) if len(prefectures) < 1: print('There are no prefectures of : ', province) if not without_prefecture: for p in prefectures: if with_prefecture: county.append(p) county.extend(list(self.database.find(parent=p['_id'], version=self.version, sorts=[('acode', ASCENDING)]))) county.extend(list(self.database.find(grandpa=province_id))) else: # to find prefecture item prefecture_found = self.get_prefecture_by_name(province, prefecture) if len(prefecture_found) < 1: print('Can not find ', province, '.', prefecture) raise NameError if len(prefecture_found) > 1: print('Tow much: ', province, '.', prefecture) print(prefecture_found) raise NameError county = self.database.find(parent=prefecture_found[0][ '_id'], version=self.version, sorts=[('acode', ASCENDING)]) return list(county) # 设置版本 def set_version(self, version): '''设置版本号 :param str version: 颁布的行政区划版本号 :return: 无返回值 ''' self.version = version self.year = re.split('_', self.version)[0] # 设置年份 def set_year(self, year): '''设置年份 :param str year: 年份 :return: 无返回值 ''' self.year = str(year) self.version = self.database.version(self.year)[-1] # 所有的省级单位 @property def Province(self): return self._sorted(list(self.database.find(adminlevel=2, version=self.version))) # 所有的省级单位 @property def Prefecture(self): return self._sorted(list(self.database.find(adminlevel=3, version=self.version))) # 所有的省级单位 @property def County(self): return self._sorted(list(self.database.find(adminlevel=4, version=self.version))) # 获得省级和地级单位 @property def ProvincePrefecture(self): result = [] provinces = self.Province for province in provinces: result.append(province) result.extend(self.get_prefecture_children(province['region'])) return result # 获得省级、地级和县级单位 @property def ProvincePrefectureCounty(self): result = [] provinces = self.Province for province in provinces: result.append(province) prefectures = self.get_prefecture_children(province['region']) for prefecture in prefectures: result.append(prefecture) result.extend(self.get_county_children( province['region'], prefecture['region'])) return result # 辅助排序函数 def _sorted(self, regions): if len(regions) < 1: return [] return sorted(regions, key=lambda x: x['acode'])