예제 #1
0
    def __init__(self, region_query=None):
        # 设置查询结果
        if region_query is None:
            mongo = MongoDB()
            mdb = MonDatabase(mongodb=mongo, database_name='region')
            collection = MonCollection(database=mdb, collection_name='admincode')
            self.collection = collection.collection
        else:
            self.collection = region_query

        self.collection = None
예제 #2
0
    def search_from_dbase(variables=None, query_dict=None, match='exact'):
        collection_variable = MonCollection(database=MonDatabase(
            mongodb=MongoDB(), database_name='region'),
                                            collection_name='storedvariable')
        found = collection_variable.find(query_dict)
        found_dict = {item['origin']: item['variable'] for item in found}
        if match == 'exact':
            pd_result = VariableMatcher.search_for_same_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])
        else:
            pd_result = VariableMatcher.search_for_similar_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[
                    ind, 'matched_variable']
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])

        return pd_result
예제 #3
0
        return ref_regions_dict

    @property
    def matched_region(self):
        return self._result


if __name__ == '__main__':
    pop_year = '2010'
    pop_region_file_2010 = r'E:\data\popcensus\origin\var_temp.xls'
    raw_region_2010 = Excel(pop_region_file_2010).read()
    to_be_matched = [re.sub('\s+','',item[0]) for item in raw_region_2010 if re.match('^\s*$',item[0]) is None]
    pd_to_be_matched = pd.DataFrame(to_be_matched,columns=['region'])
    pd_to_be_matched['rid'] = range(pd_to_be_matched.shape[0])

    collection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='admincode')
    found = collection.collection.find(filter={'year':'2010'},
                                       projection={'acode':True,'region':True,'_id':True},
                                       sort=[('acode',1)])

    pd_to_be_compared = pd.DataFrame(list(found))
    pd_to_be_compared['cid'] = range(pd_to_be_compared.shape[0])
    #pd_to_be_compared['_id'] = pd_to_be_compared['_id'].apply(str)

    print(pd_to_be_matched,pd_to_be_compared)
    algo = RegionMatchingOrderAlgorithm(pd_to_be_matched,pd_to_be_compared)
    # 首先是寻找可靠的匹配作为锚点
    algo.find_anchor()
    # 其次进行顺序的严格匹配
    algo.exactly_matching_from_region_set()
    print(algo.matched_region)
예제 #4
0
# coding = UTF-8

from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection

# 0. 连接数据库
collection_variable = MonCollection(database=MonDatabase(
    mongodb=MongoDB(), database_name='variable'),
                                    collection_name='referencevariable')

# 1. 参数设置
CEIC_VARIABLE = False
CHINASTAT_VARIABLE = True

# 2. 导入CEIC变量
if CEIC_VARIABLE:
    ceic_collection = MonCollection(database=MonDatabase(
        mongodb=MongoDB(), database_name='region'),
                                    collection_name='ceic')
    refer_variables = ceic_collection.collection.find().distinct('variable')
    source = 'CEIC'

# 3. 导入中国统计年鉴变量
if CHINASTAT_VARIABLE:
    Chinastat_collection = MonCollection(database=MonDatabase(
        mongodb=MongoDB(), database_name='region'),
                                         collection_name='provincestat')
    refer_variables = Chinastat_collection.collection.find().distinct(
        'variable')
    source = '中国统计年鉴'

if isinstance(refer_variables, list):
예제 #5
0
 def __init__(self):
     # 连接AdminDatabase集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb,
                                     collection_name='admincode')
예제 #6
0
USER_VARIABLES = None
USER_PERIOD = None
USER_REGION = None
USER_FILTER = dict()
USER_PROJECTION = {'_id': 0, 'variable': 1, 'value': 1, 'acode': 1, 'year': 1}
USER_SORT = [('acode', 1), ('year', 1)]

# 平滑面板时固定的轴名称
USER_FIXED_INDEX = 'acode'

# 数据集最小数量
USER_MIN_DATASET_NUMBER = 50

# 1. to query in the mongodb
# 1.1 连接数据库
user_database = MonDatabase(mongodb=MongoDB(),
                            database_name=DATABASES.get(DATABASE_CHOICE))
user_colllection = COLLECTIONS.get(COLLECTION_CHOICE)(database=user_database)

# 1.2 数据库基本参数打印
if COLLECTION_INFO:
    user_colllection.info()

round = 1
while True:
    # 1.3 查询数据,返回结果
    # 打印开始信息
    print(''.join(['=' * 40, 'Round ', str(round), '=' * 40]))

    # 设定变量variables
    if VARIABLE_RANDOM_CHOICE > 0:
예제 #7
0
 def __init__(self):
     # 连接PopCensus集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb, collection_name='popcensus')
예제 #8
0
# coding = UTF-8

import re
from libs.database.class_mondbprovincestat import MonDBProvinceStat
from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection

mongo = MongoDB()
mdb = MonDatabase(mongodb=mongo, database_name='region')
prostat = MonDBProvinceStat(database=mdb)

# 1. 修正变量名重合的问题
variables_origin = set(prostat.variables)
variables_no_space = set([re.sub('\s+', '', var) for var in prostat.variables])

i = 1
for item in sorted(prostat.variables):
    print(i, ': ', item, ' -- ', len(item))
    i += 1
예제 #9
0
        :param fill_value: 详见pandas.pivot_table()函数参数说明
        :return: 返回转换后的宽格式表格
        :rtype: pandas.DataFrame
        """
        result = pd.pivot_table(data=dataframe,
                                values=values,
                                index=index,
                                columns=columns,
                                dropna=dropna,
                                fill_value=fill_value)

        return result


if __name__ == '__main__':
    mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(),
                                                     database_name='region'),
                                collection_name='provincestat')
    cursor = mcollection.find(
        {
            'variable': {
                '$in': ['人均地区生产总值', '私人控股企业法人单位数', '城镇居民消费', '城镇单位就业人员平均工资']
            }
        },
        projection={
            '_id': 0,
            'variable': 1,
            'value': 1,
            'province': 1,
            'acode': 1,
            'year': 1
예제 #10
0
 def __init__(self,
              database=MonDatabase(mongodb=MongoDB(),
                                   database_name='region'),
              collection_name='ceic'):
     super().__init__(database=database, collection_name=collection_name)
예제 #11
0
# coding = UTF-8

from libs.imexport.class_mongodb import MongoDB, MonDatabase
from pymongo import MongoClient
from sshtunnel import SSHTunnelForwarder

'''
server = SSHTunnelForwarder(
    ('123.207.185.126',22),
    ssh_username="******",
    ssh_password="******",
    remote_bind_address=('10.66.131.25', 27017)
)

server.start()

print(server.local_bind_port)  # show assigned local port
# work with `SECRET SERVICE` through `server.local_bind_port`.
conn_str = 'mongodb://*****:*****@127.0.0.1:{}/admin'.format(server.local_bind_port)
print(conn_str)

mongo = MongoDB(conn_str=conn_str)
#print(mongo.database_names)

server.stop()'''

mongo = MongoDB(conn_str='mongodb://*****:*****@123.207.185.126:27017/')
print(mongo.database_names)
예제 #12
0
 def setUp(self):
     mongo = MongoDB(
         conn_str='mongodb://*****:*****@139.196.189.191:3717/')
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.mcollection = MonCollection(database=mdb,
                                      collection_name='cities')