def get_content(phone): try: res = requests.get(f'https://www.baidu.com/s?wd={phone}', headers=headers) reshtml = etree.HTML(res.text) # tag addr content = reshtml.xpath( ".//div[@class='result-op c-container']//div" "[@class='c-span21 c-span-last']//span//text()")[1] con_list = content.split(" ") if len(con_list) > 1: addr = ''.join(con_list[0].split()) tag = con_list[1] else: addr = '' tag = con_list[0] # info_list try: search = reshtml.xpath( "//div[@class='result c-container ']/h3/a/text()") info_list = search[:config.sougou_search_num] except: info_list = search # package msg = { 'source': NAME, 'phone': phone, 'status': 'success', 'addr': addr, 'tag': tag, 'search_list': info_list, 'timestamp': get_current_timestamp() } # 按要求入库 if 1: mysql_obj.save_to_db(msg) return msg except Exception as e: return { 'source': NAME, 'phone': phone, 'status': 'failed', 't': get_current_timestamp() }
def index(): """url for test index""" return (jsonify({ 'code': '1', 'msg': 'success', 'timestamp': get_current_timestamp(), }), 200)
def query(phone): """ 查询入口 """ # 先从缓存读取数据,缓存没有再查询 cache_key = NAME + phone res = cache.get(cache_key) if res: return res # 查询归属地 和 号码标签 addr = get_address(phone) tag = get_tag_in_web(phone) # 组织查询结果 if addr and tag and (tag not in ['程序错误', '无可用cookie']): status = 'success' else: status = 'failed' res = { 'source': NAME, 'phone': phone, 'status': status, 'addr': addr, 'tag': tag, 'timestamp': get_current_timestamp() } # 如果查询成功,缓存数据 if status == 'success': cache.set(cache_key, res, config.RESULT_EXPIRE) # 返回查询结果 return res
def internal_error(error): """handle 500 error""" msg = error.description return (jsonify({ 'code': '500', 'msg': 'internal error' if not msg else msg, 'timestamp': get_current_timestamp(), }), 200)
def not_allowed(error): """handle 403 error""" msg = error.description return (jsonify({ 'code': '403', 'msg': 'unauthorized' if not msg else msg, 'timestamp': get_current_timestamp(), }), 200)
def not_found(error): """ handle 404 error""" msg = error.description return (jsonify({ 'code': '404', 'msg': 'not found' if not msg else msg, 'timestamp': get_current_timestamp(), }), 200)
async def query(): while 1: phone = PHONES.pop() # 查询归属地 和 号码标签 tag, info_list, source = await get_tag_in_web(phone) res = { 'source': source, 'phone': phone, 'tag': tag.replace(':0', ':').replace("'", ''), 'search_list': info_list, 'timestamp': get_current_timestamp() } # 一般有便签都有 : if ':' in res['tag']: RESS.append(res)
def get_cookies(): """存指定量的cookie到redis""" chrome_options = Options() chrome_options.add_argument("--headless") base_url = config.CHROME_BASE_URL try: driver = webdriver.Chrome( executable_path=(r'/usr/local/bin/chromedriver'), chrome_options=chrome_options) except: driver = webdriver.Chrome(executable_path=(config.CHROME_DRIVER_PATH), chrome_options=chrome_options) print('start .....') try: while 1: num = cache.cookie_count() print(get_current_timestamp(), ':redis have', num, 'cookie ...') if num < config.COOKIES_POOL_SIZE: driver.get(base_url + "/") try: time.sleep(1) driver.find_element_by_id("query").send_keys("112") driver.find_element_by_id('stb').click() except: # ajx 未加载完成,重新打开页面 continue cookie = driver.get_cookies() cookies = dict() for i in cookie: cookies[i['name']] = i['value'] cache.put(cookies) print('save one cookie') driver.delete_all_cookies() time.sleep(0.6) except Exception as e: print(e) # 关闭浏览器 driver.quit()