def search_from_dbase(variables=None,query_dict=None,match='exact'):
        collection_variable = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'),
                                            collection_name='storedvariable')
        found = collection_variable.find(query_dict)
        found_dict = {item['origin']:item['variable'] for item in found}
        if match == 'exact':
            pd_result = VariableMatcher.search_for_same_variable(variables=variables,source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(pd_result.loc[ind, 'matched_variable'])
        else:
            pd_result = VariableMatcher.search_for_similar_variable(variables=variables,source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[ind, 'matched_variable']
                pd_result.loc[ind,'matched_variable'] = found_dict.get(pd_result.loc[ind,'matched_variable'])

        return pd_result
Esempio n. 2
0
class TestCalculate(unittest.TestCase):
    def setUp(self):
        mongo = MongoDB(conn_str='mongodb://*****:*****@139.196.189.191:3717/')
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.mcollection = MonCollection(database=mdb, collection_name='cities')

    def test_connect_mongodb(self):
        self.assertEqual(2, len(list(self.mcollection.find())))
Esempio n. 3
0
class TestCalculate(unittest.TestCase):
    def setUp(self):
        mongo = MongoDB(
            conn_str='mongodb://*****:*****@139.196.189.191:3717/')
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.mcollection = MonCollection(database=mdb,
                                         collection_name='cities')

    def test_connect_mongodb(self):
        self.assertEqual(2, len(list(self.mcollection.find())))
class PopCensusDatabase():
    """ 类PopCensusDatabase表示人口普查数据库

    """
    def __init__(self):
        # 连接PopCensus集合
        mongo = MongoDB()
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.collection = MonCollection(database=mdb, collection_name='popcensus')

    # 年份
    @property
    def period(self):
        return sorted(self.collection.find().distinct('year'))

    # 年份
    @property
    def variables(self):
        return sorted(self.collection.find().distinct('variable'))
Esempio n. 5
0
class AdminDatabase():
    """ 类AdminDatabase用来连接行政区划数据库

    """
    def __init__(self):
        # 连接AdminDatabase集合
        mongo = MongoDB()
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.collection = MonCollection(database=mdb,
                                        collection_name='admincode')

    # 查询
    def find(self, **conds):
        # 设置projection
        projection = conds.get('projection')
        if projection is None:
            projection = {
                'region': 1,
                'version': 1,
                'adminlevel': 1,
                'acode': 1,
                '_id': 1,
                'parent': 1
            }
        else:
            conds.pop('projection')
        # 设置sorts
        sorts = conds.get('sorts')
        if sorts is None:
            sorts = [('year', ASCENDING), ('acode', ASCENDING)]
        else:
            conds.pop('sorts')

        # 设置查询条件
        condition = dict()
        for key in conds:
            if isinstance(conds[key], list):
                condition[key] = {'$in': conds[key]}
            else:
                condition[key] = conds[key]

        # 返回查询结果
        return self.collection.find(condition, projection).sort(sorts)

    # 年份
    @property
    def period(self):
        return sorted(self.find().distinct('year'))

    # 版本号
    def version(self, year=None):
        if year is None:
            return sorted(self.find().distinct('version'))
        else:
            return sorted(self.find(year=str(year)).distinct('version'))
Esempio n. 6
0
    def __init__(self, region_query=None):
        # 设置查询结果
        if region_query is None:
            mongo = MongoDB()
            mdb = MonDatabase(mongodb=mongo, database_name='region')
            collection = MonCollection(database=mdb, collection_name='admincode')
            self.collection = collection.collection
        else:
            self.collection = region_query

        self.collection = None
Esempio n. 7
0
    def search_from_dbase(variables=None, query_dict=None, match='exact'):
        collection_variable = MonCollection(database=MonDatabase(
            mongodb=MongoDB(), database_name='region'),
                                            collection_name='storedvariable')
        found = collection_variable.find(query_dict)
        found_dict = {item['origin']: item['variable'] for item in found}
        if match == 'exact':
            pd_result = VariableMatcher.search_for_same_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])
        else:
            pd_result = VariableMatcher.search_for_similar_variable(
                variables=variables, source=found_dict.keys())
            for ind in pd_result.index:
                pd_result.loc[ind, 'matched_middel_variable'] = pd_result.loc[
                    ind, 'matched_variable']
                pd_result.loc[ind, 'matched_variable'] = found_dict.get(
                    pd_result.loc[ind, 'matched_variable'])

        return pd_result
Esempio n. 8
0
class AdminDatabase():
    """ 类AdminDatabase用来连接行政区划数据库

    """
    def __init__(self):
        # 连接AdminDatabase集合
        mongo = MongoDB()
        mdb = MonDatabase(mongodb=mongo, database_name='region')
        self.collection = MonCollection(database=mdb, collection_name='admincode')

    # 查询
    def find(self,**conds):
        # 设置projection
        projection = conds.get('projection')
        if projection is None:
            projection = {'region':1,'version':1,'adminlevel':1,'acode':1,'_id':1,'parent':1}
        else:
            conds.pop('projection')
        # 设置sorts
        sorts = conds.get('sorts')
        if sorts is None:
            sorts= [('year',ASCENDING),('acode',ASCENDING)]
        else:
            conds.pop('sorts')

        # 设置查询条件
        condition = dict()
        for key in conds:
            if isinstance(conds[key],list):
                condition[key] = {'$in':conds[key]}
            else:
                condition[key] = conds[key]

        # 返回查询结果
        return self.collection.find(condition,projection).sort(sorts)

    # 年份
    @property
    def period(self):
        return sorted(self.find().distinct('year'))

    # 版本号
    def version(self,year=None):
        if year is None:
            return sorted(self.find().distinct('version'))
        else:
            return sorted(self.find(year=str(year)).distinct('version'))
 def __init__(self):
     # 连接PopCensus集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb, collection_name='popcensus')
Esempio n. 10
0
        :param values: 详见pandas.pivot_table()函数参数说明
        :param index: 详见pandas.pivot_table()函数参数说明
        :param columns: 详见pandas.pivot_table()函数参数说明
        :param dropna: 详见pandas.pivot_table()函数参数说明
        :param fill_value: 详见pandas.pivot_table()函数参数说明
        :return: 返回转换后的宽格式表格
        :rtype: pandas.DataFrame
        """
        result = pd.pivot_table(data=dataframe, values=values, index=index, columns=columns,
                                dropna=dropna,fill_value=fill_value)

        return result


if __name__ == '__main__':
    mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'),
                                collection_name='provincestat')
    cursor = mcollection.find({'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}},
                              projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1})
    #cursor = mcollection.find({'year':'2010', 'variable':{'$in':['人均地区生产总值','私人控股企业法人单位数','城镇居民消费','城镇单位就业人员平均工资']}},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1})
    #cursor = mcollection.find({'variable':'人均地区生产总值','acode':'110000'},
    #                          projection={'_id':0,'variable':1,'value':1,'province':1,'acode':1,'year':1})
    mongoconverter = MongoDBToPandasFormat(cursor)

    # Test first
    #result = mongoconverter(values='value', index=['year'], columns='variable',dropna=True)
    result = mongoconverter(values='value', index=['acode','year'], columns='variable',dropna=True)
    #result = mongoconverter(values='value', index=['acode','year'], columns='variable',
    #                        dropna=False, balanced=True)
    print(result)
    #result.to_excel('e:/backup/result.xlsx')
 def __init__(self):
     # 连接admindivision集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb, collection_name='admindivision')
Esempio n. 12
0
        return ref_regions_dict

    @property
    def matched_region(self):
        return self._result


if __name__ == '__main__':
    pop_year = '2010'
    pop_region_file_2010 = r'E:\data\popcensus\origin\var_temp.xls'
    raw_region_2010 = Excel(pop_region_file_2010).read()
    to_be_matched = [re.sub('\s+','',item[0]) for item in raw_region_2010 if re.match('^\s*$',item[0]) is None]
    pd_to_be_matched = pd.DataFrame(to_be_matched,columns=['region'])
    pd_to_be_matched['rid'] = range(pd_to_be_matched.shape[0])

    collection = MonCollection(database=MonDatabase(mongodb=MongoDB(), database_name='region'), collection_name='admincode')
    found = collection.collection.find(filter={'year':'2010'},
                                       projection={'acode':True,'region':True,'_id':True},
                                       sort=[('acode',1)])

    pd_to_be_compared = pd.DataFrame(list(found))
    pd_to_be_compared['cid'] = range(pd_to_be_compared.shape[0])
    #pd_to_be_compared['_id'] = pd_to_be_compared['_id'].apply(str)

    print(pd_to_be_matched,pd_to_be_compared)
    algo = RegionMatchingOrderAlgorithm(pd_to_be_matched,pd_to_be_compared)
    # 首先是寻找可靠的匹配作为锚点
    algo.find_anchor()
    # 其次进行顺序的严格匹配
    algo.exactly_matching_from_region_set()
    print(algo.matched_region)
Esempio n. 13
0
# coding = UTF-8

from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection

# 0. 连接数据库
collection_variable = MonCollection(database=MonDatabase(
    mongodb=MongoDB(), database_name='variable'),
                                    collection_name='referencevariable')

# 1. 参数设置
CEIC_VARIABLE = False
CHINASTAT_VARIABLE = True

# 2. 导入CEIC变量
if CEIC_VARIABLE:
    ceic_collection = MonCollection(database=MonDatabase(
        mongodb=MongoDB(), database_name='region'),
                                    collection_name='ceic')
    refer_variables = ceic_collection.collection.find().distinct('variable')
    source = 'CEIC'

# 3. 导入中国统计年鉴变量
if CHINASTAT_VARIABLE:
    Chinastat_collection = MonCollection(database=MonDatabase(
        mongodb=MongoDB(), database_name='region'),
                                         collection_name='provincestat')
    refer_variables = Chinastat_collection.collection.find().distinct(
        'variable')
    source = '中国统计年鉴'

if isinstance(refer_variables, list):
Esempio n. 14
0
 def __init__(self):
     # 连接AdminDatabase集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb,
                                     collection_name='admincode')
Esempio n. 15
0
 def setUp(self):
     mongo = MongoDB(
         conn_str='mongodb://*****:*****@139.196.189.191:3717/')
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.mcollection = MonCollection(database=mdb,
                                      collection_name='cities')
Esempio n. 16
0
        :return: 返回转换后的宽格式表格
        :rtype: pandas.DataFrame
        """
        result = pd.pivot_table(data=dataframe,
                                values=values,
                                index=index,
                                columns=columns,
                                dropna=dropna,
                                fill_value=fill_value)

        return result


if __name__ == '__main__':
    mcollection = MonCollection(database=MonDatabase(mongodb=MongoDB(),
                                                     database_name='region'),
                                collection_name='provincestat')
    cursor = mcollection.find(
        {
            'variable': {
                '$in': ['人均地区生产总值', '私人控股企业法人单位数', '城镇居民消费', '城镇单位就业人员平均工资']
            }
        },
        projection={
            '_id': 0,
            'variable': 1,
            'value': 1,
            'province': 1,
            'acode': 1,
            'year': 1
        })
Esempio n. 17
0
# coding = UTF-8

from libs.datasheet.class_sheetanalyst import SheetAnalyst
from libs.datasheet.class_DataSheet import DataSheet
from os import path, listdir
import pandas as pd
from libs.imexport.class_mongodb import MongoDB, MonDatabase, MonCollection

file_path = r'E:\data\popcensus\origin'
print(listdir(file_path))

mongo = MongoDB()
mdb = MonDatabase(mongodb=mongo, database_name='region')
collection_variable = MonCollection(database=mdb,
                                    collection_name='storedvariable')

for i in range(1, 9):
    variable_file = path.join(
        file_path, ''.join(['popcensus_2000_variable',
                            str(i), '.xls']))
    rdata = pd.read_excel(io=variable_file, type=1, sheet='Sheet1')
    print(rdata)

    variable_dict = {}
    for ind in rdata.index:
        record = {
            'origin': rdata.loc[ind, 'origin_var'],
            'variable': rdata.loc[ind, 'matched_var'],
            'source': '中国人口普查',
            'year': '2000'
        }
Esempio n. 18
0
        return pd_result

    @staticmethod
    def fuzzy_variable_matching(variable, compared, error='auto'):
        if re.match('^auto$', error) is not None:
            error = max(1, int(len(variable) * 0.6))

        return regex.fullmatch('(?:%s){e<=%s}' % (variable, str(error)),
                               compared)


if __name__ == '__main__':
    mongo = MongoDB()
    mdb = MonDatabase(mongodb=mongo, database_name='region')
    collection_province = MonCollection(database=mdb,
                                        collection_name='popcensus')
    collection_variable = MonCollection(database=mdb,
                                        collection_name='storedvariable')
    '''
    var = set()
    for v in sorted(collection_province.find().distinct('variable')):
        var.add(re.sub('\s+','',v))
        print(v,len(v))
    print(len(var))
    print(len(collection_province.find().distinct('variable')))'''

    #for v in collection_province.find().distinct('variable'):
    #    collection_variable.collection.insert_one({'variable':v,'source':'中国人口普查'})

    #vars = [item['variable'] for item in collection_variable.find()]
    #x = ['总人口_合计', '总人口_男', '总人口_女', '总人口_性别比', '户籍人口', '少数民族人口比重_', '非农业户口人口比重_', '城乡人口_城镇', '城乡人口_乡村', '家庭户_户数', '家庭户_人口数', '家庭户_户规模', '家庭户_其中:一人户', '家庭户类别_一代户', '家庭户类别_二代户', '家庭户类别_三代户', '家庭户类别_四代以上户']
Esempio n. 19
0
 def setUp(self):
     mongo = MongoDB(conn_str='mongodb://*****:*****@139.196.189.191:3717/')
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.mcollection = MonCollection(database=mdb, collection_name='cities')