Esempio n. 1
0
    def store_data_to_db(self, data_collection=None, label_collection=None):
        """ 把stata对象中的数据存入数据库

        :param data_collection:
        :param label_collection:
        :return: 返回self
        """
        if data_collection is None:
            data_collection = MonCollection(
                database=MonDatabase(mongodb=MongoDB(),
                                     database_name='surveydata'),
                collection_name='cgssdata').collection

        if label_collection is None:
            label_collection = MonCollection(
                database=MonDatabase(mongodb=MongoDB(),
                                     database_name='surveydata'),
                collection_name='cgsslabel').collection

        for year in self._stata_object:

            stata_data = self._stata_object[year].read()
            records = stata_data.to_dict("records")
            for record in records:
                record["year"] = year
                print(record)
                data_collection.insert_one(record)

            value_labels = self._stata_object[year].value_labels
            str_value_labels = dict()
            for key in value_labels:
                str_value_labels[key] = {
                    str(inn_key): value_labels[key][inn_key]
                    for inn_key in value_labels[key]
                }
            str_value_labels["year"] = year
            str_value_labels["type"] = "value labels"
            print(str_value_labels)
            label_collection.insert_one(str_value_labels)

            variable_labels = self._stata_object[year].variable_labels
            variable_labels["year"] = year
            variable_labels["type"] = "variable labels"
            print(variable_labels)
            label_collection.insert_one(variable_labels)

        return self
    def __init__(self):
        """ 初始化中国城市统计数据库接口

        """
        mongo = MongoDB(conn_str='localhost:27017')
        self.conn = MonCollection(mongo,
                                  database='regiondata',
                                  collection_name='citystatistics').collection
    def __init__(self):
        mongo = MongoDB(conn_str='localhost:27017')
        self._college_info = MonCollection(
            mongo, database='webdata',
            collection_name='college_info').collection
        self._college_intro = MonCollection(
            mongo, database='webdata',
            collection_name='college_introduction').collection

        self._headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
        }
Esempio n. 4
0
    def __init__(self, data_collection=None, label_collection=None):
        """ 初始化数据库连接

        :param data_collection:
        :param label_collection:
        """
        if data_collection is None:
            self._data_collection = MonCollection(
                database=MonDatabase(
                    mongodb=MongoDB(conn_str='localhost:27017'),
                    database_name='surveydata'),
                collection_name='cgssdata').collection
        else:
            self._data_collection = data_collection

        if label_collection is None:
            self._label_collection = MonCollection(
                database=MonDatabase(
                    mongodb=MongoDB(conn_str='localhost:27017'),
                    database_name='surveydata'),
                collection_name='cgsslabel').collection
        else:
            self._label_collection = label_collection
 def __init__(self):
     mongo = MongoDB(conn_str='localhost:27017')
     self._web_conn = MonCollection(mongo,
                                    database='cache',
                                    collection_name='gaokaoweb').collection
     self._data_web_conn = MonCollection(
         mongo, database='cache',
         collection_name='gaokaodataweb').collection
     self._university_web_conn = MonCollection(
         mongo, database='cache',
         collection_name='gaokaouniversityweb').collection
     self._data_conn = MonCollection(
         mongo, database='webdata',
         collection_name='gaokao_entrancescore').collection
     self._copy_data_web_conn = MonCollection(
         mongo, database='webdata',
         collection_name='gaokaouniversityweb').collection
Esempio n. 6
0
    def store_label_to_db(self, label_collection=None):
        """ 把变量和值标签关联存储到数据库

        :param label_collection:
        :return: 返回self
        """
        if label_collection is None:
            label_collection = MonCollection(
                database=MonDatabase(mongodb=MongoDB(),
                                     database_name='surveydata'),
                collection_name='cgsslabel').collection

        for year in self._stata_label_object:
            stata_label_data = self._stata_label_object[year].read()
            records = dict(
                zip(stata_label_data.loc[:, "name"],
                    stata_label_data.loc[:, "vallab"]))
            records["year"] = year
            records["type"] = "variable value lables"
            print(records)
            label_collection.insert_one(records)

        return self
# coding = UTF-8

import os
import pickle
import numpy as np
import pandas as pd
from lib.base.database.class_mongodb import MongoDB, MonDatabase, MonCollection

mongo = MongoDB(
    conn_str=
    'mongodb://*****:*****@dds-bp162bb74b8184e41658-pub.mongodb.rds.aliyuncs.com:3717'
)
mdb = MonDatabase(mongodb=mongo, database_name='enterprise')
mcon = MonCollection(mongo, mdb, 'cross_holding_data')

PROJECT_DATA_PATH = r'E:\datahouse\projectdata\shareholder'

file_path = os.path.join(PROJECT_DATA_PATH, 'cross_holding_main_table.xls')
cross_holding_data_table = pd.read_excel(file_path)

vars = list(cross_holding_data_table.columns)
var_dtype = dict(zip(vars, [str] * len(vars)))
print(var_dtype)
cross_holding_data_table = pd.read_excel(file_path, dtype=var_dtype)
#cross_holding_data_table = cross_holding_data_table.replace('nan',None)

records = cross_holding_data_table.to_dict('records')
# coding = UTF-8

import re
import pysal
from pymongo import ASCENDING
import pandas as pd
from lib.base.database.class_mongodb import MongoDB, MonCollection
from application.dataworld.admindivision.class_admindivision import AdminDivision

# 1. 数据库连接
mongo = MongoDB(conn_str='localhost:27017')
college_info_con = MonCollection(mongo,
                                 database='webdata',
                                 collection_name='college_info').collection
entrance_score_con = MonCollection(
    mongo, database='webdata',
    collection_name='gaokao_entrancescore').collection

# 2. 步骤参数设置
# a. 导出每年的高考分数数据
IS_EXPORT_RAW_EXAM_SCORE = False
# b. 导出高校信息数据
IS_EXPORT_RAW_COLLEGE_INFO = False
# c. 2011-2013年面板数据
IS_MERGE_INTO_PANEL = False
# d. 合并高校信息数据
IS_MERGE_COLLEGE_INFO = False
# e. 合并大学排名信息
IS_MERGE_COLLEGE_RATE = False
# f. 合并省级经济信息
IS_MERGE_PROVINCE_PERGDP = False
Esempio n. 9
0
 def __init__(self):
     # 连接admindivision集合
     mongo = MongoDB()
     mdb = MonDatabase(mongodb=mongo, database_name='region')
     self.collection = MonCollection(database=mdb,
                                     collection_name='admindivision')
    def __init__(self, journals_webs=None):
        self._journal_websites = journals_webs

        self._pre_conn = MonCollection(mongodb=MongoDB(), database='papers', collection_name='econpaperwebsites')
        self._paper_conn = MonCollection(mongodb=MongoDB(), database='papers', collection_name='econpapers')
 def __init__(self, database='papers', collection='econpapers'):
     self.literatures = None
     self._paper_conn = MonCollection(mongodb=MongoDB(),
                                      database=database,
                                      collection_name=collection)
            ],
                                  type=1)
            abstract = item.get('abstract')
            if abstract is not None:
                abstract = escape_latex(abstract)
                doc.document.append(abstract)
        #doc.document.generate_tex(r'E:\github\latexdoc\latexdoc\template\academicjournal\wlscirep\plutopaper.tex')
        doc.document.generate_pdf(
            r'D:\github\pluto\lib\base\pylatex\template\output\{}'.format(
                file_name))


if __name__ == '__main__':
    report = LiteratureReport()

    conn = MonCollection(mongodb=MongoDB(),
                         database='papers',
                         collection_name='econpapers')
    journals = conn.collection.find().distinct('journal')

    for journal in journals:
        print(journal)
        if journal == 'Econometrica':
            report.load_record_from_db(query={
                'journal': journal,
                'year': {
                    '$gte': 2012
                }
            },
                                       sort=[('journal', ASCENDING),
                                             ('year', DESCENDING)])
Esempio n. 13
0
 def __init__(self, database='proxy', collection_name='proxys'):
     # 设置数据库
     self._conn = MonCollection(mongodb=MongoDB(),
                                database=database,
                                collection_name=collection_name)