def snapshot_qichacha(batch_num, url, website): timestamp = int(time.time()) snapshot = batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str( timestamp) + ".png" path = base_filepath + "/" + batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str( timestamp) try: driver = WebDriver.get_chrome_by_local() # driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', # desired_capabilities=dcap, # options=chrome_options) driver.set_page_load_timeout(60) driver.set_script_timeout(60) driver.maximize_window() driver.get(url) driver.save_screenshot(path + ".png") img = Image.open(path + ".png") jpg = img.crop((265, 158, 420, 258)) jpg.save(path + "_thumb.bmp") return driver, snapshot except Exception as e: logger.info(e) return None, None
def get_merchant_url(batch_num, website): monitor_bc_dao = MonitorBcDao() url = "https://www.qichacha.com" driver = WebDriver.get_chrome_by_local() driver.set_page_load_timeout(60) driver.set_script_timeout(60) driver.maximize_window() timestamp = int(time.time()) snapshot = batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str( timestamp) + ".png" path = base_filepath + "/" + batch_num + "_" + website.merchant_name + "_" + website.merchant_num + "_工商_" + str( timestamp) try: random_seconds = random.randint(20, 30) logger.info("企查查随机等待 %s 秒...", str(random_seconds)) time.sleep(random_seconds) driver.get(url) driver.find_element_by_id("searchkey").send_keys( website.merchant_name) driver.find_element_by_id("V3_Search_bt").click() source = driver.page_source soup = BeautifulSoup(source, 'html.parser') title = soup.find(name="title") if title is None or str(title.get_text()) == "会员登录 - 企查查" or str( title.get_text()) == "405": logger.info("qichacha res title :%s", str(title)) driver.save_screenshot(path + ".png") img = Image.open(path + ".png") jpg = img.crop((265, 158, 420, 258)) jpg.save(path + "_thumb.bmp") monitor_bc = MonitorBc(batch_num=batch_num, merchant_name=website.merchant_name, merchant_num=website.merchant_num, website_name=website.website_name, domain_name=website.domain_name, saler=website.saler, snapshot=snapshot, is_normal='异常', kinds='企业是否可查', level='-', outline='由于企查查反扒策略无法获取企业详情链接地址。', create_time=datetime.datetime.now()) monitor_bc_dao.add(monitor_bc) return None tbodys = soup.find_all(id="search-result") trs = tbodys[0].find_all('tr') tds = trs[0].find_all('td') a = tds[2].find_all('a') name = a[0].get_text().strip() href = a[0].get('href') if name == website.merchant_name.strip() and str(href) is not None: return href.strip() else: driver.save_screenshot(path + ".png") img = Image.open(path + ".png") jpg = img.crop((265, 158, 420, 258)) jpg.save(path + "_thumb.bmp") monitor_bc_dao = MonitorBcDao() monitor_bc = MonitorBc(batch_num=batch_num, merchant_name=website.merchant_name, merchant_num=website.merchant_num, website_name=website.website_name, domain_name=website.domain_name, saler=website.saler, snapshot=snapshot, is_normal='正常', kinds='企业是否可查', level='-', outline='企查查没有查询到商户公司', create_time=datetime.datetime.now()) monitor_bc_dao.add(monitor_bc) return None except Exception as e: logger.error(e) driver.save_screenshot(path + ".png") img = Image.open(path + ".png") jpg = img.crop((265, 158, 420, 258)) jpg.save(path + "_thumb.bmp") monitor_bc = MonitorBc(batch_num=batch_num, merchant_name=website.merchant_name, merchant_num=website.merchant_num, website_name=website.website_name, domain_name=website.domain_name, saler=website.saler, snapshot=snapshot, is_normal='异常', kinds='企业是否可查', level='-', outline='由于企查查反扒策略无法获取企业详情链接地址。建议手动进行验证。', create_time=datetime.datetime.now()) monitor_bc_dao.add(monitor_bc) return None finally: if driver is not None: driver.quit() else: pass