Ejemplo n.º 1
0
def get_content(phone):
    try:
        res = requests.get(f'https://www.baidu.com/s?wd={phone}',
                           headers=headers)
        reshtml = etree.HTML(res.text)

        # tag addr
        content = reshtml.xpath(
            ".//div[@class='result-op c-container']//div"
            "[@class='c-span21 c-span-last']//span//text()")[1]
        con_list = content.split("  ")
        if len(con_list) > 1:
            addr = ''.join(con_list[0].split())
            tag = con_list[1]
        else:
            addr = ''
            tag = con_list[0]

        # info_list
        try:
            search = reshtml.xpath(
                "//div[@class='result c-container ']/h3/a/text()")
            info_list = search[:config.sougou_search_num]
        except:
            info_list = search

        # package
        msg = {
            'source': NAME,
            'phone': phone,
            'status': 'success',
            'addr': addr,
            'tag': tag,
            'search_list': info_list,
            'timestamp': get_current_timestamp()
        }

        # 按要求入库
        if 1:
            mysql_obj.save_to_db(msg)

        return msg
    except Exception as e:
        return {
            'source': NAME,
            'phone': phone,
            'status': 'failed',
            't': get_current_timestamp()
        }
Ejemplo n.º 2
0
def index():
    """url for test index"""
    return (jsonify({
        'code': '1',
        'msg': 'success',
        'timestamp': get_current_timestamp(),
    }), 200)
Ejemplo n.º 3
0
def query(phone):
    """
		查询入口
	"""
    # 先从缓存读取数据,缓存没有再查询
    cache_key = NAME + phone
    res = cache.get(cache_key)
    if res:
        return res

    # 查询归属地 和 号码标签
    addr = get_address(phone)
    tag = get_tag_in_web(phone)

    # 组织查询结果
    if addr and tag and (tag not in ['程序错误', '无可用cookie']):
        status = 'success'
    else:
        status = 'failed'
    res = {
        'source': NAME,
        'phone': phone,
        'status': status,
        'addr': addr,
        'tag': tag,
        'timestamp': get_current_timestamp()
    }

    # 如果查询成功,缓存数据
    if status == 'success':
        cache.set(cache_key, res, config.RESULT_EXPIRE)

    # 返回查询结果
    return res
Ejemplo n.º 4
0
def internal_error(error):
    """handle 500 error"""
    msg = error.description
    return (jsonify({
        'code': '500',
        'msg': 'internal error' if not msg else msg,
        'timestamp': get_current_timestamp(),
    }), 200)
Ejemplo n.º 5
0
def not_allowed(error):
    """handle 403 error"""
    msg = error.description
    return (jsonify({
        'code': '403',
        'msg': 'unauthorized' if not msg else msg,
        'timestamp': get_current_timestamp(),
    }), 200)
Ejemplo n.º 6
0
def not_found(error):
    """ handle 404 error"""
    msg = error.description
    return (jsonify({
        'code': '404',
        'msg': 'not found' if not msg else msg,
        'timestamp': get_current_timestamp(),
    }), 200)
Ejemplo n.º 7
0
async def query():
    while 1:
        phone = PHONES.pop()
        # 查询归属地 和 号码标签
        tag, info_list, source = await get_tag_in_web(phone)
        res = {
            'source': source,
            'phone': phone,
            'tag': tag.replace(':0', ':').replace("'", ''),
            'search_list': info_list,
            'timestamp': get_current_timestamp()
        }

        # 一般有便签都有 :
        if ':' in res['tag']:
            RESS.append(res)
Ejemplo n.º 8
0
def get_cookies():
    """存指定量的cookie到redis"""
    chrome_options = Options()
    chrome_options.add_argument("--headless")

    base_url = config.CHROME_BASE_URL
    try:
        driver = webdriver.Chrome(
            executable_path=(r'/usr/local/bin/chromedriver'),
            chrome_options=chrome_options)
    except:
        driver = webdriver.Chrome(executable_path=(config.CHROME_DRIVER_PATH),
                                  chrome_options=chrome_options)

    print('start .....')
    try:
        while 1:
            num = cache.cookie_count()
            print(get_current_timestamp(), ':redis have', num, 'cookie ...')
            if num < config.COOKIES_POOL_SIZE:
                driver.get(base_url + "/")
                try:
                    time.sleep(1)
                    driver.find_element_by_id("query").send_keys("112")
                    driver.find_element_by_id('stb').click()
                except:
                    # ajx 未加载完成,重新打开页面
                    continue
                cookie = driver.get_cookies()
                cookies = dict()
                for i in cookie:
                    cookies[i['name']] = i['value']
                cache.put(cookies)
                print('save one cookie')
                driver.delete_all_cookies()
            time.sleep(0.6)
    except Exception as e:
        print(e)
        # 关闭浏览器
        driver.quit()