Example #1
0
def new_enterprise_main():
    """
    获取新增企业数据
    :return:
    """
    global device_id, tim, sign, header
    device_id, tim, sign, header = more_get_token()
    r = connect_redis(0, 110)
    for url, city, province in creat_url():
        handle_page(url, city, province, r)
        sleep(1.5)
Example #2
0
def handle_page(url, city, province, r):
    """
    处理每一个页面的url以及数据
    :param url:
    :param city:
    :param province:
    :param r:
    :return:
    """
    global device_id, tim, sign, header, is_break
    res = requests.get(url, headers=header)
    res_data = dict(
        eval(
            res.text.replace('false',
                             'False').replace('true',
                                              'True').replace('null', 'None')))
    if '200' not in str(res_data):
        while '200' not in str(res_data):
            sleep(10)
            sign_tmp = sign
            tim_tmp = tim
            print('handle_page', res.text)
            print('权限不足或者accessToken失效,sign失败')
            device_id, tim, sign, header = more_get_token()
            url = url.replace(sign_tmp, sign).replace().replace(tim_tmp, tim)
            res = requests.get(url, headers=header, timeout=10)
            res_data = dict(
                eval(
                    res.text.replace('false',
                                     'False').replace('true', 'True').replace(
                                         'null', 'None')))
    res.close()
    try:
        qiye_data = res_data.get('result').get('Result')
    except Exception as e:
        print(e)
        return
    if not qiye_data:
        is_break = True
        return
    write_list = []
    for qiye in qiye_data:
        if qiye.get('StartDate') != get_yesterday():
            is_break = True
            continue
        qiye['City'] = city
        qiye['Province'] = province
        del qiye['ImageUrl']
        del qiye['HitReason']
        write_list.append(qiye)
        r.set(province + ":" + city + ':' + qiye.get('KeyNo'), str(qiye))
    threading.Thread(target=write_data,
                     args=(get_yesterday() + "-data.txt", write_list)).start()
Example #3
0
def handle_page(url, city, province, r):
    """
    处理每一个页面的url以及数据
    :param url:
    :param city:
    :param province:
    :param r:
    :return:
    """
    global device_id, tim, sign, header
    res = requests.get(url, headers=header)
    print(city, province)
    res_data = dict(
        eval(
            res.text.replace('false',
                             'False').replace('true',
                                              'True').replace('null', 'None')))
    if '200' not in str(res_data):
        while '200' not in str(res_data):
            sleep(10)
            print('handle_page', res.text)
            print('权限不足或者accessToken失效,sign失败')
            device_id, tim, sign, header = more_get_token()
            res = requests.get(url, headers=header, timeout=10)
            res_data = dict(
                eval(
                    res.text.replace('false',
                                     'False').replace('true', 'True').replace(
                                         'null', 'None')))
    res.close()
    try:
        qiye_data = res_data.get('result').get('Result')
    except Exception as e:
        print(e)
        return
    for qiye in qiye_data:
        if qiye.get('StartDate') != get_yesterday(): continue
        qiye['City'] = city
        qiye['Province'] = province
        r.set(province + city + ':' + qiye.get('KeyNo'), str(qiye))
Example #4
0
@Time    : 2019-06-18 17:57
@Author  : Xincheng.Zhao
@Desc    : 获取每天新增企业
@Email   : [email protected]
@File    : getnewdata.py
"""
import threading
from multiprocessing import Process
from time import sleep

import requests
from apscheduler.schedulers.blocking import BlockingScheduler

from common.utils import connect_redis, get_yesterday, read_data, more_get_token, write_data

device_id, tim, sign, header = more_get_token()

is_break = False


def new_enterprise_main():
    """
    获取新增企业数据
    :return:
    """
    global device_id, tim, sign, header
    device_id, tim, sign, header = more_get_token()
    r = connect_redis(0, 110)
    for url, city, province in creat_url():
        handle_page(url, city, province, r)
        sleep(1.5)