def __init__(self, region_query=None): # 设置查询结果 if region_query is None: mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') collection = MonCollection(database=mdb, collection_name='admincode') self.collection = collection.collection else: self.collection = region_query self.collection = None
def search_from_dbase(variables=None, query_dict=None, match='exact'): collection_variable = MonCollection(database=MonDatabase( mongodb=MongoDB(), database_name='region'), collection_name='storedvariable') found = collection_variable.find(query_dict) found_dict = {item['origin']: item['variable'] for item in found} if match == 'exact': pd_result = VariableMatcher.search_for_same_variable( variables=variables, source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_variable'] = found_dict.get( pd_result.loc[ind, 'matched_variable']) else: pd_result = VariableMatcher.search_for_similar_variable( variables=variables, source=found_dict.keys()) for ind in pd_result.index: pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[ ind, 'matched_variable'] pd_result.loc[ind, 'matched_variable'] = found_dict.get( pd_result.loc[ind, 'matched_variable']) return pd_result
return ref_regions_dict @property def matched_region(self): return self._result if __name__ == '__main__': pop_year = '2010' pop_region_file_2010 = r'E:\data\popcensus\origin\var_temp.xls' raw_region_2010 = Excel(pop_region_file_2010).read() to_be_matched = [re.sub('\s+','',item[0]) for item in raw_region_2010 if re.match('^\s*$',item[0]) is None] pd_to_be_matched = pd.DataFrame(to_be_matched,columns=['region']) pd_to_be_matched['rid'] = range(pd_to_be_matched.shape[0]) collection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='admincode') found = collection.collection.find(filter={'year':'2010'}, projection={'acode':True,'region':True,'_id':True}, sort=[('acode',1)]) pd_to_be_compared = pd.DataFrame(list(found)) pd_to_be_compared['cid'] = range(pd_to_be_compared.shape[0]) #pd_to_be_compared['_id'] = pd_to_be_compared['_id'].apply(str) print(pd_to_be_matched,pd_to_be_compared) algo = RegionMatchingOrderAlgorithm(pd_to_be_matched,pd_to_be_compared) # 首先是寻找可靠的匹配作为锚点 algo.find_anchor() # 其次进行顺序的严格匹配 algo.exactly_matching_from_region_set() print(algo.matched_region)
# coding = UTF-8 from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection # 0. 连接数据库 collection_variable = MonCollection(database=MonDatabase( mongodb=MongoDB(), database_name='variable'), collection_name='referencevariable') # 1. 参数设置 CEIC_VARIABLE = False CHINASTAT_VARIABLE = True # 2. 导入CEIC变量 if CEIC_VARIABLE: ceic_collection = MonCollection(database=MonDatabase( mongodb=MongoDB(), database_name='region'), collection_name='ceic') refer_variables = ceic_collection.collection.find().distinct('variable') source = 'CEIC' # 3. 导入中国统计年鉴变量 if CHINASTAT_VARIABLE: Chinastat_collection = MonCollection(database=MonDatabase( mongodb=MongoDB(), database_name='region'), collection_name='provincestat') refer_variables = Chinastat_collection.collection.find().distinct( 'variable') source = '中国统计年鉴' if isinstance(refer_variables, list):
def __init__(self): # 连接AdminDatabase集合 mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') self.collection = MonCollection(database=mdb, collection_name='admincode')
USER_VARIABLES = None USER_PERIOD = None USER_REGION = None USER_FILTER = dict() USER_PROJECTION = {'_id': 0, 'variable': 1, 'value': 1, 'acode': 1, 'year': 1} USER_SORT = [('acode', 1), ('year', 1)] # 平滑面板时固定的轴名称 USER_FIXED_INDEX = 'acode' # 数据集最小数量 USER_MIN_DATASET_NUMBER = 50 # 1. to query in the mongodb # 1.1 连接数据库 user_database = MonDatabase(mongodb=MongoDB(), database_name=DATABASES.get(DATABASE_CHOICE)) user_colllection = COLLECTIONS.get(COLLECTION_CHOICE)(database=user_database) # 1.2 数据库基本参数打印 if COLLECTION_INFO: user_colllection.info() round = 1 while True: # 1.3 查询数据,返回结果 # 打印开始信息 print(''.join(['=' * 40, 'Round ', str(round), '=' * 40])) # 设定变量variables if VARIABLE_RANDOM_CHOICE > 0:
def __init__(self): # 连接PopCensus集合 mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') self.collection = MonCollection(database=mdb, collection_name='popcensus')
# coding = UTF-8 import re from libs.database.class_mondbprovincestat import MonDBProvinceStat from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection mongo = MongoDB() mdb = MonDatabase(mongodb=mongo, database_name='region') prostat = MonDBProvinceStat(database=mdb) # 1. 修正变量名重合的问题 variables_origin = set(prostat.variables) variables_no_space = set([re.sub('\s+', '', var) for var in prostat.variables]) i = 1 for item in sorted(prostat.variables): print(i, ': ', item, ' -- ', len(item)) i += 1
:param fill_value: 详见pandas.pivot_table()函数参数说明 :return: 返回转换后的宽格式表格 :rtype: pandas.DataFrame """ result = pd.pivot_table(data=dataframe, values=values, index=index, columns=columns, dropna=dropna, fill_value=fill_value) return result if __name__ == '__main__': mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='provincestat') cursor = mcollection.find( { 'variable': { '$in': ['人均地区生产总值', '私人控股企业法人单位数', '城镇居民消费', '城镇单位就业人员平均工资'] } }, projection={ '_id': 0, 'variable': 1, 'value': 1, 'province': 1, 'acode': 1, 'year': 1
def __init__(self, database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='ceic'): super().__init__(database=database, collection_name=collection_name)
# coding = UTF-8 from libs.imexport.class_mongodb import MongoDB, MonDatabase from pymongo import MongoClient from sshtunnel import SSHTunnelForwarder ''' server = SSHTunnelForwarder( ('123.207.185.126',22), ssh_username="******", ssh_password="******", remote_bind_address=('10.66.131.25', 27017) ) server.start() print(server.local_bind_port) # show assigned local port # work with `SECRET SERVICE` through `server.local_bind_port`. conn_str = 'mongodb://*****:*****@127.0.0.1:{}/admin'.format(server.local_bind_port) print(conn_str) mongo = MongoDB(conn_str=conn_str) #print(mongo.database_names) server.stop()''' mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/') print(mongo.database_names)
def setUp(self): mongo = MongoDB( conn_str='mongodb://*****:*****@139.196.189.191:3717/') mdb = MonDatabase(mongodb=mongo, database_name='region') self.mcollection = MonCollection(database=mdb, collection_name='cities')