def __init__(self,data=None): Format.__init__(self,data) self.ad = AdminData() self._type()
class RegionFormat(Format): ''' 类Format用来进行格式转换。 ''' # 构造函数 def __init__(self,data=None): Format.__init__(self,data) self.ad = AdminData() self._type() # 转换格式 def transform(self,sourcetype='stack',targettype='normal',connect='outer'): if (re.match('stack',sourcetype) is not None) and (re.match('normal',targettype) is not None): return self._stackToNormal(connect=connect) def _stackToNormal(self,connect): scale = self.ndim.get('scale') # 当存在全市和市辖区的差别的时候 if (scale is not None) and scale > 1: # 构建横截面数据(地区|变量) if self.ndim['year'] == 1: g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0] regionpair = dict(zip(g1['acode'],g1['region'])) g2 = g1.groupby(['variable','scale'], sort=True) i = 0 mdata = [] for name,data in g2: rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values) if i == 0: mdata = rdata else: mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how=connect) i = i + 1 tags = {'year',self.year[0]} mdata.insert(0, 'region', [regionpair[i] for i in mdata.index]) return {'tags':tags,'data':mdata} # panel data g = self._data.groupby(['year'], sort=True) year = [] pdata =[] for y,g1 in g: regionpair = dict(zip(g1['acode'],g1['region'])) g2 = g1.groupby(['variable','scale'], sort=True) i = 0 mdata = [] for name,data in g2: rdata = pd.DataFrame({'_'.join(name):data['value'].values},index=data['acode'].values) if i == 0: mdata = rdata else: mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer') i = i + 1 mdata.insert(0, 'region', [regionpair[i] for i in mdata.index]) year.append(str(y)) pdata.append(mdata) result = pd.Panel(dict(zip(year,pdata))) mdata = result.swapaxes('items','minor') mdata = mdata.swapaxes('major','minor') mdata = mdata.to_frame(False) result2 = result.dropna(axis=1) mdata2 = result2.swapaxes('items','minor') mdata2 = mdata2.swapaxes('major','minor') mdata2 = mdata2.to_frame(False) return {'data':mdata,'pdata':result,'balanceddata':mdata2} # 如果没有scale,或者scale为1 else: # 时间序列模型 if (self.ndim['variable'] == 1) and (self.ndim['region'] == 1): tags = {'variable':self.variable[0],'region':self.ad.getByAcode(self.acode[0])[0]['region']} result = pd.Series(dict(zip(self.year,[group for name, group in self._data.groupby(['acode','variable'],sort=True)][0]['value']))) nresult = pd.DataFrame({tags['variable']:result}) if scale is not None: tags['scale'] = self.scale[0] return {'tags':tags,'data':nresult,'sdata':result} # 构建横截面数据(地区|变量) if self.ndim['year'] == 1: g1 = [group for name,group in self._data.groupby(['year'], sort=True)][0] regionpair = dict(zip(g1['acode'],g1['region'])) g2 = g1.groupby(['variable'], sort=True) i = 0 mdata = [] for name,data in g2: rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values) if i == 0: mdata = rdata else: mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer') i = i + 1 mdata.insert(0, 'region', [regionpair[i] for i in mdata.index]) tags = {'year':self.year[0]} if scale is not None: tags['scale'] = self.scale[0] return {'tags':tags,'data':mdata} # panel data g = self._data.groupby(['year'], sort=True) year = [] pdata =[] for y,g1 in g: regionpair = dict(zip(g1['acode'],g1['region'])) g2 = g1.groupby(['variable'], sort=True) i = 0 mdata = [] for name,data in g2: rdata = pd.DataFrame({name:data['value'].values},index=data['acode'].values) if i == 0: mdata = rdata else: mdata = pd.merge(mdata,rdata,left_index=True,right_index=True,how='outer') i = i + 1 mdata.insert(0, 'region', [regionpair[i] for i in mdata.index]) year.append(str(y)) pdata.append(mdata) result = pd.Panel(dict(zip(year,pdata))) mdata = result.swapaxes('items','minor') mdata = mdata.swapaxes('major','minor') mdata = mdata.to_frame(False) result2 = result.dropna(axis=1) mdata2 = result2.swapaxes('items','minor') mdata2 = mdata2.swapaxes('major','minor') mdata2 = mdata2.to_frame(False) if scale is not None: tags = {'scale':self.scale} return {'tags':tags,'data':mdata,'pdata':result,'balanceddata':mdata2} else: return {'data':mdata,'pdata':result,'balanceddata':mdata2} # 辅助函数,返回数据结构 def _type(self)->dict: g = self._data.groupby(['year'], sort=True) self.year = [str(name) for name, group in g] g = self._data.groupby(['acode'], sort=True) self.acode = [name for name, group in g] self.numOfRegion = len(g.groups) g = self._data.groupby(['variable'], sort=True) self.variable = [name for name, group in g] self.ndim = {'year':len(self.year),'variable':len(self.variable),'region':self.numOfRegion} if 'scale' in self._data.columns: g = self._data.groupby(['scale'],sort=True) self.scale = [name for name, group in g] self.ndim['scale'] = len(self.scale)
def __init__(self): # 连接CityStatistics集合 Database.__init__(self) #self._connect('regionDB','CityStatistics') self._connect('regionDB','CEIC') self.ad = AdminData()
class CityStatDatabase(Database): ''' 类CityStatDatabase用来连接CityStatistics数据库 ''' # 构造函数 def __init__(self): # 连接CityStatistics集合 Database.__init__(self) #self._connect('regionDB','CityStatistics') self._connect('regionDB','CEIC') self.ad = AdminData() # 查询 def find(self,conds,toStandardForm=True): print(conds) # 设置projection projection = conds.get('projection') if projection is None: projection = {'region':1,'year':1,'value':1,'acode':1,'_id':0,'variable':1,'scale':1} else: conds.pop('projection') # 设置sorts sorts = conds.get('sorts') if sorts is None: sorts= [('year',ASCENDING),('acode',ASCENDING)] else: conds.pop('sorts') # 设置时间 period = conds.get('year') if period is None: variables = conds.get('variable',self.variables) period = self.period(variables) else: period = period conds.pop('year') result = [] conditions = dict() for key in conds: if re.match('region',key) is not None: continue if isinstance(conds[key],list): conditions[key] = {'$in':conds[key]} else: conditions[key] = conds[key] # 重点是设置区域 if 'region' in conds: for year in period: conditions['year'] = year self.ad.setYear(year) if re.match('^[0-9]{6}$',conds['region'][0]) is not None: conditions['acode'] = {'$in': conds['region']} else: conditions['acode'] = {'$in':[region['acode'] for item in conds['region'] for region in self.ad[tuple(item)]]} result.extend(list(self.collection.find(conditions,projection).sort(sorts))) mresult = pd.DataFrame(result) else: if isinstance(period,list): conditions['year'] = {'$in':period} else: conditions['year'] = period mresult = pd.DataFrame(list(self.collection.find(conditions,projection).sort(sorts))) if mresult is None: return None mresult = mresult.drop_duplicates(keep='last') if toStandardForm: rformat = RegionFormat(mresult) return rformat.transform() else: return mresult @property def variables(self): return self.collection.find().distinct('variable') # 获得变量所有的时期 def period(self,variable): print(variable) if isinstance(variable,str): posts = self.collection.find({'variable':variable}).distinct('year') else: posts = set() for var in variable: periods = self.collection.find({'variable':var}).distinct('year') posts.update(periods) posts = list(posts) return sorted(posts)
def __init__(self): # 连接CityStatistics集合 Database.__init__(self) self._connect("regionDB", "CityStatistics") self.ad = AdminData()
class CityStatDatabase(Database): """ 类CityStatDatabase用来连接CityStatistics数据库 """ # 构造函数 def __init__(self): # 连接CityStatistics集合 Database.__init__(self) self._connect("regionDB", "CityStatistics") self.ad = AdminData() # 查询 def find(self, conds, toStandardForm=True): # 设置projection projection = conds.get("projection") if projection is None: projection = {"region": 1, "year": 1, "value": 1, "acode": 1, "_id": 0, "variable": 1, "scale": 1} else: conds.pop("projection") # 设置sorts sorts = conds.get("sorts") if sorts is None: sorts = [("year", ASCENDING), ("acode", ASCENDING)] else: conds.pop("sorts") # 设置时间 period = conds.get("year") if period is None: variables = conds.get("variable", self.variables) period = self.period(variables) else: period = period conds.pop("year") result = [] conditions = dict() for key in conds: if re.match("region", key) is not None: continue if isinstance(conds[key], list): conditions[key] = {"$in": conds[key]} else: conditions[key] = conds[key] # 重点是设置区域 if "region" in conds: for year in period: conditions["year"] = year self.ad.setYear(year) conditions["acode"] = { "$in": [region["acode"] for item in conds["region"] for region in self.ad[tuple(item)]] } result.extend(list(self.collection.find(conditions, projection).sort(sorts))) mresult = pd.DataFrame(result) else: if isinstance(period, list): conditions["year"] = {"$in": period} else: conditions["year"] = period mresult = pd.DataFrame(list(self.collection.find(conditions, projection).sort(sorts))) mresult = mresult.drop_duplicates(take_last=True) if toStandardForm: rformat = RegionFormat(mresult) return rformat.transform() else: return mresult @property def variables(self): return self.collection.find().distinct("variable") # 获得变量所有的时期 def period(self, variable): if isinstance(variable, str): posts = self.collection.find({"variable": variable}).distinct("year") else: posts = set() for var in variable: periods = self.collection.find({"variable": var}).distinct("year") posts.update(periods) posts = list(posts) return sorted(posts)