Beispiel #1
0
def main():
    source_db = MongDb(mongo_db_conf['host'],
                       mongo_db_conf['port'],
                       mongo_db_conf['db'],
                       mongo_db_conf['username'],
                       mongo_db_conf['password'],
                       log=log)

    count = 0
    total = 0
    already = 0
    with open(conf_name) as p_file:
        for line in p_file:
            total += 1
            company = line.strip("\n").strip("\r").strip(" ")
            item = source_db.find_one(table_name, {'company': company})
            if item is None:
                log.error("当前企业没有抓到: {company}".format(company=company))
                count += 1
            else:
                log.info("已抓到企业: {}".format(company))
                already += 1
        log.info("总共企业数目为: {}".format(total))
        log.info("当前已抓到的个数: {}".format(already))
        log.info("当前总共没有抓到企业数目为: {}".format(count))
 def __init__(self, page, log):
     self.__page = page
     self.log = log
     self.log.info("获得 {} 页之后的数据...".format(self.__page))
     self.mongo = MongDb(LocalMongoConfig.HOST,
                         LocalMongoConfig.PORT,
                         LocalMongoConfig.DB,
                         LocalMongoConfig.USER,
                         LocalMongoConfig.PASSWD,
                         log=self.log)
     self.table = "douban"
     self.request = self.__init_reqeust()
     self.douban_handler = DouBanInfoHandler()
Beispiel #3
0
    def __init__(self, secrite_key, token, user_id, log):
        self.log = log
        self.secrite_key = secrite_key
        self.user_id = user_id
        self.token = token
        self.request = self.__init_reqeust()
        self.cp_mongo = MongDb(LocalMongoConfig.HOST,
                               LocalMongoConfig.PORT,
                               LocalMongoConfig.DB,
                               LocalMongoConfig.USER,
                               LocalMongoConfig.PASSWD,
                               log=self.log)

        self.cp_table = "yizhou_cp"
Beispiel #4
0
def search_task():
    log = Gsxtlogger('hunan.log').get_logger()
    mongo_db_conf = {
        'host': '172.16.215.16',
        'port': 40042,
        'db': 'app_data',
        'username': '******',
        'password': '******'
    }

    # 搜索列表存储表
    source_db = MongDb(mongo_db_conf['host'], mongo_db_conf['port'], mongo_db_conf['db'],
                       mongo_db_conf['username'],
                       mongo_db_conf['password'], log=log)

    for company in data_list:
        item = source_db.find_one('enterprise_data_gov', {'company': company})
        if item is None:
            log.error(company)
            continue

        if 'shareholder_information' not in item:
            log.warn(company)
            continue
Beispiel #5
0
    '重庆市': 'chongqing',
    '陕西省': 'shanxi',
    '总局': 'gsxt'
}

mongo_db_company_data = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': '******',
    'password': '******'
}

source_db = MongDb(mongo_db_company_data['host'],
                   mongo_db_company_data['port'],
                   mongo_db_company_data['db'],
                   mongo_db_company_data['username'],
                   mongo_db_company_data['password'],
                   log=log)

db_query = pymongo.MongoClient('172.16.215.2', 40042)['schedule_data']
db_query.authenticate('work', 'haizhi')

db_query_app_data = pymongo.MongoClient('172.16.215.16', 40042)['app_data']
db_query_app_data.authenticate('work', 'haizhi')


#
def main():
    try:
        count = 0
        log.info('开始读取数据...')
    'port': 40042,
    'db': 'company_data',
    'username': '******',
    'password': '******'
}

mongo_db_target = {
    'host': "103.36.136.211",
    'port': 40042,
    'db': 'company_data',
    "username": '******',
    "password": '******',
}

log = Gsxtlogger('copy_data_to_beihai.log').get_logger()
source_db = MongDb(mongo_db_source['host'], mongo_db_source['port'], mongo_db_source['db'],
                   mongo_db_source['username'], mongo_db_source['password'], log=log)

target_db = MongDb(mongo_db_target['host'], mongo_db_target['port'], mongo_db_target['db'],
                   mongo_db_target['username'], mongo_db_target['password'], log=log)


def main():
    collection_table = 'offline_all_list'

    log.info("开始导入数据..")
    result_list = []
    count = 0
    for item in source_db.traverse(collection_table):
        item['crawl_online'] = 0

        result_list.append(item)
Beispiel #7
0
from logger import Gsxtlogger

log = Gsxtlogger('find_in_gsxt.log').get_logger()

db_conf = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': '******',
    'password': '******',
}

source_db = MongDb(db_conf['host'],
                   db_conf['port'],
                   db_conf['db'],
                   db_conf['username'],
                   db_conf['password'],
                   log=log)


def classify():
    with open("company_invalid.txt", "w") as invalid_file:
        with open("company_valid.txt", "w") as valid_file:
            with open("company_list.txt") as p_file:
                for line in p_file:
                    company = line.strip("\r").strip("\n").strip()
                    # if source_db.find_one("enterprise_data_gov", {"company": company}) is None:
                    #     log.warn("当前企业不存在: {}".format(company))
                    # else:
                    #     log.info("找到企业信息: {}".format(company))
                    if len(company) <= 15 or len(company) > 90:
    'db': 'crawl_data',
    'username': '******',
    'password': '******'
}

mongo_db_gov = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data_test',
    'username': '******',
    'password': '******'
}

crawl_data_db = MongDb(mongo_db_crawl_data['host'],
                       mongo_db_crawl_data['port'],
                       mongo_db_crawl_data['db'],
                       mongo_db_crawl_data['username'],
                       mongo_db_crawl_data['password'],
                       log=log)

gov_db = MongDb(mongo_db_gov['host'],
                mongo_db_gov['port'],
                mongo_db_gov['db'],
                mongo_db_gov['username'],
                mongo_db_gov['password'],
                log=log)

# while True:
#     data = {'province': 'yunnan', 'company_name': '国网'}
#     producer.produce(json.dumps(data))
#     log.info(count)
#     # time.sleep()
Beispiel #9
0
    'password': '******'
}

app_data_conf = {
    'host': '172.16.215.16',
    'port': 40042,
    'db': 'app_data',
    'username': '******',
    'password': '******'
}

log = Gsxtlogger('count_gansu.log').get_logger()

company_data_db = MongDb(company_data_conf['host'],
                         company_data_conf['port'],
                         company_data_conf['db'],
                         company_data_conf['username'],
                         company_data_conf['password'],
                         log=log)

app_data_db = MongDb(app_data_conf['host'],
                     app_data_conf['port'],
                     app_data_conf['db'],
                     app_data_conf['username'],
                     app_data_conf['password'],
                     log=log)


def get_now_time():
    from datetime import datetime
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
Beispiel #10
0
    "db": "crawl_data_new",
    "username": "******",
    "password": "******",
}

mongo_db_webpage_old = {
    "host": "172.16.215.2",
    "port": 40042,
    "db": "crawl_data",
    "username": "******",
    "password": "******",
}

log = Gsxtlogger('find_equity_field.log').get_logger()

target_db_new = MongDb(mongo_db_webpage_new['host'], mongo_db_webpage_new['port'], mongo_db_webpage_new['db'],
                       mongo_db_webpage_new['username'], mongo_db_webpage_new['password'], log=log)

target_db_old = MongDb(mongo_db_webpage_old['host'], mongo_db_webpage_old['port'], mongo_db_webpage_old['db'],
                       mongo_db_webpage_old['username'], mongo_db_webpage_old['password'], log=log)

mail_from_addr = '*****@*****.**'
mail_password = '******'
mail_to_addrs = ['*****@*****.**']


def send_email(from_addr, password, to_addrs, subject, msg, smtp_host="smtp.weibangong.com", smtp_port=465):
    email_client = SMTP(smtp_host, smtp_port)
    email_client.login(from_addr, password)
    msg['Subject'] = Header(subject, 'utf-8')
    msg['From'] = from_addr
    msg['To'] = str(to_addrs)
mongo_db_source = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': '******',
    'password': '******'
}

global_logger = Gsxtlogger('insert_company.log')
global_log = global_logger.get_logger()

# 搜索列表存储表
source_db = MongDb(mongo_db_source['host'],
                   mongo_db_source['port'],
                   mongo_db_source['db'],
                   mongo_db_source['username'],
                   mongo_db_source['password'],
                   log=global_log)

beanstalk_consumer_conf = {'host': 'cs0.sz-internal.haizhi.com', 'port': 11400}
beanstalk = PyBeanstalk(beanstalk_consumer_conf['host'],
                        beanstalk_consumer_conf['port'])


def main(search_name, province, unified_social_credit_code, param):
    item = {
        "_id":
        "9c9d8f8b848514f240f54a40b0a0c6f02622b3d87d54d353e525ca58d9dbe312",
        "province": province,
        "crawl_online": 0,
        "error_times": 0,
    length = len(sys.argv)
    if length > 2:
        search_list = re.findall('config/(.*?)\.conf', sys.argv[1])
        if len(search_list) > 0:
            log_name = search_list[0] + '_' + sys.argv[2] + '.log'

    return log_name


global_logger = Gsxtlogger(get_log_name())
global_log = global_logger.get_logger()

# 旧网页库
target_db = MongDb(mongo_db_target['host'],
                   mongo_db_target['port'],
                   mongo_db_target['db'],
                   mongo_db_target['username'],
                   mongo_db_target['password'],
                   log=global_log)

# 新网页库
target_db_new = MongDb(mongo_db_target_new['host'],
                       mongo_db_target_new['port'],
                       mongo_db_target_new['db'],
                       mongo_db_target_new['username'],
                       mongo_db_target_new['password'],
                       log=global_log)

# 搜索列表存储表
source_db = MongDb(mongo_db_source['host'],
                   mongo_db_source['port'],
                   mongo_db_source['db'],