Esempio n. 1
0
    def __init__(self):
        self.admin_db = AdminCodeDatabase()
        self.admin_data = AdminData()
        self.period = self.admin_db.period

        self.Province = self.admin_data.Province

        self.version = list()
        for y in self.period:
            self.version.extend(self.admin_db.version(y))
Esempio n. 2
0
 def __init__(self):
     Database.__init__(self)
     self.connect('regionDB','CityStatistics')
     self.ad = AdminData()
Esempio n. 3
0
class CityStatDatabase(Database):
    '''CityStatDatabase类用来处理城市统计数据库

    '''
    def __init__(self):
        Database.__init__(self)
        self.connect('regionDB','CityStatistics')
        self.ad = AdminData()

    def find(self,conds,is_to_standard_form=True):
        '''查询城市统计年鉴的区域数据

        :param dict conds: 查询条件
        :param bool is_to_standard_form: 标示参数,表示是否输出为标准格式
        :return: 城市统计年鉴的区域数据
        :rtype: list
        '''
        projection = conds.get('projection')
        if projection is None:
            projection = {'region':1,'year':1,'value':1,'acode':1,'_id':0,'variable':1,'scale':1}
        else:
            conds.pop('projection')
        # 设置sorts
        sorts = conds.get('sorts')
        if sorts is None:
            sorts= [('year',ASCENDING),('acode',ASCENDING)]
        else:
            conds.pop('sorts')

        # 设置时间
        period = conds.get('year')
        if period is None:
            variables = conds.get('variable',self.variables)
            period = self.period(variables)
        else:
            conds.pop('year')

        result = []
        conditions = dict()
        for key in conds:
            if re.match('region',key) is not None:
                continue
            if isinstance(conds[key],list):
                conditions[key] = {'$in':conds[key]}
            else:
                conditions[key] = conds[key]

        # 重点是设置区域
        if 'region' in conds:
            for year in period:
                conditions['year'] = year
                self.ad.set_year(year)
                conditions['acode'] = {'$in':[region['acode'] for item in conds['region'] for region in self.ad[tuple(item)]]}
                result.extend(list(self.collection.find(conditions,projection).sort(sorts)))
            result_found = pd.DataFrame(result)
        else:
            if isinstance(period,list):
                conditions['year'] = {'$in':period}
            else:
                conditions['year'] = period
            result_found = pd.DataFrame(list(self.collection.find(conditions,projection).sort(sorts)))

        result = result_found.drop_duplicates(take_last=True)

        if is_to_standard_form:
            return RegionFormat(result).transform()
        else:
            return result

    @property
    def variables(self):
        return self.collection.find().distinct('variable')

    def period(self,variable):
        '''获得变量的所有时期

        :param str,list variable: 变量
        :return: 变量的年份
        :rtype: list
        '''
        if isinstance(variable,str):
            posts = self.collection.find({'variable':variable}).distinct('year')
        else:
            posts = set()
            for var in variable:
                periods = self.collection.find({'variable':var}).distinct('year')
                posts.update(periods)
            posts = list(posts)
        return sorted(posts)
Esempio n. 4
0
class AdminCodeCheck:
    def __init__(self):
        self.admin_db = AdminCodeDatabase()
        self.admin_data = AdminData()
        self.period = self.admin_db.period

        self.Province = self.admin_data.Province

        self.version = list()
        for y in self.period:
            self.version.extend(self.admin_db.version(y))

    # 测试省级区划
    def admin_checker(self,level='s'):
        no = 1
        for ver in sorted(self.version):
            self.admin_data.set_version(ver)
            self.admin_division = self.admin_data[level]
            admin_division_data = [[p['acode'],p['region']] for p in self.admin_division]
            one_result = pd.DataFrame(admin_division_data,columns=['acode',ver])
            one_result = one_result.set_index('acode')
            if no == 1:
                result = one_result
            else:
                result = pd.merge(result,one_result,left_index=True,right_index=True,how='outer')
            no = no + 1

        return result

    # 另一个角度测试,从每个省级开始
    def admin_division_checker(self,province=None):
        result_prefectures = None
        result_counties = dict()
        result_counties_alone = None

        for ver in sorted(self.version):
            self.admin_data.set_version(ver)
            # 省级行政区划
            province_record = self.admin_data[province]

            if len(province_record) < 1:
                continue

            # 地级行政区划
            prefecture_records = self.admin_data[tuple([province_record[0]['region'],'f'])]
            prefectures = [[p['acode'],p['region']] for p in prefecture_records]
            one_result = pd.DataFrame(prefectures,columns=['acode',ver])
            one_result = one_result.set_index('acode')
            if result_prefectures is None:
                result_prefectures = one_result
            else:
                result_prefectures = pd.merge(result_prefectures,one_result,left_index=True,right_index=True,how='outer')

            # 县级行政区划
            for pre in prefectures:
                county_records = self.admin_data[tuple([province_record[0]['region'],pre[1],'f'])]
                counties = [[p['acode'],p['region']] for p in county_records]
                one_result = pd.DataFrame(counties,columns=['acode',ver])
                one_result = one_result.set_index('acode')
                if result_counties.get(pre[0]) is None:
                    result_counties[pre[0]] = one_result
                else:
                    result_counties[pre[0]] = pd.merge(result_counties[pre[0]],one_result,left_index=True,right_index=True,how='outer')

            county_alone_records = self.admin_data.get_county_children(province=province_record[0]['region'],without_prefecture=True)
            counties_alone = [[p['acode'],p['region']] for p in county_alone_records]
            one_result_alone = pd.DataFrame(counties_alone,columns=['acode',ver])
            one_result_alone = one_result_alone.set_index('acode')
            if result_counties_alone is None:
                result_counties_alone = one_result
            else:
                result_counties_alone = pd.merge(result_counties_alone,one_result_alone,left_index=True,right_index=True,how='outer')

        return {'prefectures':result_prefectures,'counties_with_prefecture':result_counties,'counties_without_prefecture':result_counties_alone}
Esempio n. 5
0
 def __init__(self,data):
     Format.__init__(self,data)
     self.ad = AdminData()
     self.to_set_type()
Esempio n. 6
0
class RegionFormat(Format):
    '''RegionFormat类用来进行区域数据格式转换

    :param list data: 区域数据
    '''
    def __init__(self,data):
        Format.__init__(self,data)
        self.ad = AdminData()
        self.to_set_type()

    # 转换格式
    def transform(self,sourcetype='stack',targettype='normal',connect='outer'):
        if (re.match('stack',sourcetype) is not None) and (re.match('normal',targettype) is not None):
            return self.stack_to_normal(connect=connect)

    def stack_to_normal(self,connect):
        '''转换区域数据格式,从stack格式到normal格式

        :param str connect: 区域横截面数据连接方式,可以分别是'inner'或'outer'
        :return: dict
        '''
        scale = self.ndim.get('scale')
        # 当存在全市和市辖区的差别的时候
        if (scale is not None) and scale > 1:
            # 构建横截面数据(地区|变量)
            if self.ndim['year'] == 1:
                g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable','scale'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how=connect)
                    i = i + 1
                tags = {'year',self.year[0]}
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                return {'tags':tags,'data':mdata}

            # panel data
            g = self._data.groupby(['year'], sort=True)
            year = []
            pdata =[]
            for y,g1 in g:
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable','scale'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                year.append(str(y))
                pdata.append(mdata)
            result = pd.Panel(dict(zip(year,pdata)))
            mdata = result.swapaxes('items','minor')
            mdata = mdata.swapaxes('major','minor')
            mdata = mdata.to_frame(False)
            result2 = result.dropna(axis=1)
            mdata2 = result2.swapaxes('items','minor')
            mdata2 = mdata2.swapaxes('major','minor')
            mdata2 = mdata2.to_frame(False)
            return {'data':mdata,'pdata':result,'balanceddata':mdata2}
        # 如果没有scale,或者scale为1
        else:
            # 时间序列模型
            if (self.ndim['variable'] == 1) and (self.ndim['region'] == 1):
                tags = {'variable':self.variable[0],'region':self.ad.get_by_acode(self.acode[0])[0]['region']}
                result = pd.Series(dict(zip(self.year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))
                if scale is not None:
                    tags['scale'] = self.scale[0]
                return {'tags':tags,'data':result}

            # 构建横截面数据(地区|变量)
            if self.ndim['year'] == 1:
                g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                tags = {'year':self.year[0]}
                if scale is not None:
                    tags['scale'] = self.scale[0]
                return {'tags':tags,'data':mdata}

            # panel data
            g = self._data.groupby(['year'], sort=True)
            year = []
            pdata =[]
            for y,g1 in g:
                region_dict = dict(zip(g1['acode'],g1['region']))
                g2 = g1.groupby(['variable'], sort=True)
                i = 0
                mdata = []
                for name,data in g2:
                    rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                    if i == 0:
                        mdata = rdata
                    else:
                        mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                    i = i + 1
                mdata.insert(0, 'region', [region_dict[i] for i in mdata.index])
                year.append(str(y))
                pdata.append(mdata)

            print(pdata)
            result = pd.Panel(dict(zip(year,pdata)))
            mdata = result.swapaxes('items','minor')
            mdata = mdata.swapaxes('major','minor')
            mdata = mdata.to_frame(False)
            result2 = result.dropna(axis=1)
            mdata2 = result2.swapaxes('items','minor')
            mdata2 = mdata2.swapaxes('major','minor')
            mdata2 = mdata2.to_frame(False)
            if scale is not None:
                tags = {'scale':self.scale}
                return {'tags':tags,'data':mdata,'pdata':result,'balanceddata':mdata2}
            else:
                return {'data':mdata,'pdata':result,'balanceddata':mdata2}

    # 辅助函数,返回数据结构
    def to_set_type(self):
        '''辅助函数,用来设置区域数据的参数

        :return: 无返回值
        '''
        # 设定年份
        g = self._data.groupby(['year'], sort=True)
        self.year = [str(name) for name, group in g]

        # 设定行政区域代码
        g = self._data.groupby(['acode'], sort=True)
        self.acode = [name for name, group in g]
        self.number_of_region = len(g.groups)

        # 设定变量
        g = self._data.groupby(['variable'], sort=True)
        self.variable = [name for name, group in g]

        # 设定变量的维度
        self.ndim = {'year':len(self.year),'variable':len(self.variable),'region':self.number_of_region}

        # 设定区域尺度:全市或者市辖区
        if 'scale' in self._data.columns:
            g = self._data.groupby(['scale'],sort=True)
            self.scale = [name for name, group in g]
            self.ndim['scale'] = len(self.scale)
Esempio n. 7
0
class Layout:
    '''
    类Layout用于各种数据结构的转换。
    
    属性:
    self.ad: AdminCode的一个实例
    self._data: 读入的数据
    self.year:代表数据的时间,是一个str的列表
    self.acode:代表地区的行政代码,是一个str的list
    self.region:代表地区名称,是一个str的list
    self.variable:代表数据的变量,是一个str的list
    self.ndim:代表数据的维度,是一个字典
    
    方法:
    __init__(self,data=None):构造函数,用来进行初始化设置。
    _type(self)->dict:辅助函数,用来返回数据的结构类型,无输入参数。返回值是一个字典。:
    stackToNormal(self):转换数据,从stack格式到normal格式

    Demo:
    转换区域数据的格式,从stack到normal
    ad = AdminCode()
    rdata = RegionalData()    # 构建一个RegionalData的实例
    mdata = rdata.query(region=ad[u'浙江',u'f'],variable=[u'财政支出'],year=2012)    # 查询数据
    lout = Layout(mdata)    # 进行格式转换
    print(lout.stackToNormal())

    得到的结果类似于
           region   财政支出
    330100    杭州市  78628
    330200    宁波市  82844
    330300    温州市  38779
    330400    嘉兴市  26070
    '''
    # 构造函数
    def __init__(self,data=None):
        self.ad = AdminData()
        self._data = data

        self.tags = self._type()
        self.ndim = {'year':len(self.tags['year']),'variable':len(self.tags['variable']),'region':len(self.tags['region'])}

    # 格式转换
    def stackToNormal(self):
        #  构建时间序列数据
        if (self.ndim['variable'] == 1) and (self.ndim['region'] == 1):
            #data = pd.Series(dict(zip(year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))
            #d = dict(zip(year,data))
            #sname = '|'.join([self.region[0],self.variable[0]])
            #self.tags = [self.region[0],self.variable[0]]
            #self.tags = {'region':self.region[0],'variable':self.variable[0]}
            return pd.Series(dict(zip(self.year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value'])))

        # 构建横截面数据(地区|变量)
        if self.ndim['year'] == 1:
            g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0]
            g2 = g1.groupby(['variable'], sort=True)
            i = 0
            mdata = []
            for name,data in g2:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            #self.information = self.type['year']
            #self.tags = {'region':}
            return mdata

        # 构建横截面数据(地区|时间)
        if self.ndim['variable'] == 1:
            g = self._data.groupby(['year'], sort=True)
            i = 0
            mdata = []
            for name,data in g:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            #self.information = self.type['variable']
            return mdata

        # panel data
        g = self._data.groupby(['year'], sort=True)
        year = []
        pdata =[]
        for y,g1 in g:
            g2 = g1.groupby(['variable'], sort=True)
            i = 0
            mdata = []
            for name,data in g2:
                rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values)
                if i == 0:
                    mdata = rdata
                else:
                    mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer')
                i = i + 1
            mdata.insert(0, 'region', [self.ad.get_by_acode(item)[0]['region'] for item in mdata.index])
            year.append(str(y))
            pdata.append(mdata)
        result = pd.Panel(dict(zip(year,pdata)))
        #self.information = []
        return result

    # 辅助函数,返回数据结构
    def _type(self)->dict:
        g = self._data.groupby(['year'], sort=True)
        self.year = [str(name) for name, group in g]

        g = self._data.groupby(['acode'], sort=True)
        self.acode = [name for name, group in g]
        self.region = [self.ad.get_by_acode(item)[0]['region'] for item in self.acode]

        g = self._data.groupby(['variable'], sort=True)
        self.variable = [name for name, group in g]

        return {'year':self.year,'region':self.region,'variable':self.variable}
Esempio n. 8
0
    def __init__(self,data=None):
        self.ad = AdminData()
        self._data = data

        self.tags = self._type()
        self.ndim = {'year':len(self.tags['year']),'variable':len(self.tags['variable']),'region':len(self.tags['region'])}
Esempio n. 9
0
from lib.database.class_Database import Database
from werkzeug.datastructures import ImmutableMultiDict
import json

year = list(range(1990, 2015))
print(year)

# 利用CEIC的数据来做Demo
# 1. 导入CEIC数据
db = Database()
con = db.connect("regionDB", "CEIC")
ceic_region_code = sorted(con.find().distinct("acode"))
print(len(ceic_region_code))

# 2. 搜索行政区划代码数据库
admin_data = AdminData()
regions = [admin_data.get_by_acode(acode=acode)[0] for acode in ceic_region_code]

# 3. 生成区域行政数据
region_list = []
for region in regions:
    # 第一个元素是行政区划代码
    if region["adminlevel"] < 3:
        parent = u"中国"
    else:
        parent = admin_data.database.collection.find_one({"_id": region["parent"]})
        parent = "/".join(["中国", "".join([parent["region"], "属下"])])
    region_list.append([region["acode"], parent, region["region"]])
print(region_list)
# json.dump(region_list,fp=open('e:/gitwork/application/testweb/region_ceic.txt', 'w'))