def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.paycircle.cn/company/search.php?kw=" + urllib.parse.quote( keyword) + "&c=SearchList&" if driver is None: senti_util.log_error("支付圈", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source senti_util.snapshot_home("支付圈", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') div_list = soup.find_all(attrs={'class': 'list'}) if div_list.__len__() > 0: news = div_list[0].find_all('tr') for new in news: href = new.find_all('td')[2].find_all('a')[0].get("href") content = new.find_all('td')[2].find_all( 'li')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text("支付圈", content, href, batch_num, website) else: logger.info("支付圈没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.p2peye.com/search.php?mod=zonghe&srchtxt=" + urllib.parse.quote(website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("网贷天眼", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') news = soup.find_all(attrs={'class': 'result-t'}) if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text("网贷天眼", merchant_name,content, "http://" + href[2:], batch_num) else: logger.info("网贷天眼没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.chinaft.com.cn/news/search/_1.shtml?key=" + urllib.parse.quote( website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("交易中国", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("div", attrs={'class': 'xixi_ChinaFT_left_news_box'}) if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[1].get("href") content = new.find_all('a')[1].get_text() if content.find(website_name) != -1: senti_util.senti_process_text( "交易中国", merchant_name, content, "http://www.chinaft.com.cn" + href, batch_num) else: logger.info("交易中国没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) finally: driver.quit()
class TestWangdaitianyan(object): if __name__ == "__main__": """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="C:/chromedriver_2.38/chromedriver.exe") driver.get("https://www.wdzj.com/front/search/index?key=" + urllib.parse.quote("猫小贷")) """ try: driver = WebDriver.get_chrome() driver.get("http://www.wdzj.com/news/yc/2934681.html") except Exception as e: # 异常处理 print(e) pass SnapshotService.create_snapshot(driver) time.sleep(5) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') print(soup.find_all("ul", attrs={'class': 'so-tzbox'})) news = soup.find_all("ul", attrs={'class': 'so-tzbox'})[0].find_all("li") if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") print(href[2:])
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://ts.21cn.com/home/search?keyword=" + urllib.parse.quote( keyword) if driver is None: senti_util.log_error("聚投诉", url, batch_num, website) return else: pass try: driver.get(url) driver.implicitly_wait(3) source = driver.page_source senti_util.snapshot_home("聚投诉", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'complain-item'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[1].get("href") content = item.find_all('a')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "聚投诉", content, "http://www.paycircle.cn" + href[1:], batch_num, website) else: logger.info("聚投诉没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
class TestWangdaitianyan(object): if __name__ == "__main__": """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="C:/chromedriver_2.38/chromedriver.exe") driver.get("http://www.chinaft.com.cn/news/search/_1.shtml?key=" + urllib.parse.quote("京东")) """ driver = WebDriver.get_chrome() try: driver.get("http://www.chinaft.com.cn/news/search/_1.shtml?key=" + urllib.parse.quote("京东")) except Exception as e: print("error") pass print("e") SnapshotService.create_snapshot(driver) time.sleep(5) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("div", attrs={'class': 'xixi_ChinaFT_left_news_box'}) if news.__len__() > 0: for new in news: href = new.find_all('a')[1].get("href") print("http://www.chinaft.com.cn" + href) print(new.find_all('a')[1].get_text())
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://paynews.net/search.php?mod=forum" if driver is None: senti_util.log_error("支付产业网", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("scform_srchtxt") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) senti_util.snapshot_home("支付产业网", url, batch_num, website, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') div_list = soup.find(attrs={'class': 'slst mtw'}) if div_list is not None and div_list.__len__() > 0: news = div_list.find_all('li') for new in news: href = new.find_all('a')[0].get("href") content = new.find_all('a')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "支付产业网", content, "http://paynews.net/" + href, batch_num, website) else: logger.info("支付产业网没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
class TestWangdaitianyan(object): if __name__ == "__main__": """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="C:/chromedriver_2.38/chromedriver.exe") driver.get("https://www.baidu.com/") driver.find_element_by_id("kw").send_keys(u"京东") driver.find_element_by_id("su").click() """ driver = WebDriver.get_chrome() try: driver.get("https://www.baidu.com/") driver.find_element_by_xpath('//input[@name="wd"]').send_keys(u"京东") except Exception as e: # 异常处理 print(e) pass SnapshotService.create_snapshot(driver) time.sleep(5) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') for result_table in soup.find_all('h3', class_='t'): a_click = result_table.find("a"); print(a_click.get_text()) # 标题 print(str(a_click.get("href"))) # 链接 '''
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://tousu.sina.com.cn/index/search/?keywords=" + urllib.parse.quote(keyword) + "&t=0" if driver is None: senti_util.log_error("黑猫投诉", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source senti_util.snapshot_home("黑猫投诉", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'blackcat-con'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[0].get("href") content = item.find_all('h1')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text("黑猫投诉", content, href, batch_num, website) else: logger.info("黑猫投诉没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = 'https://baike.baidu.com/item/%s' % urllib.parse.quote(keyword) if driver is None: senti_util.log_error("百度百科", url, batch_num, website) return else: pass try: driver.get(url) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') check_exist = soup.find_all( name='p', attrs={'class': re.compile('sorryCont')}) if check_exist.__len__() == 0: description = soup.find( attrs={"name": "description"})['content'] senti_util.senti_process_text("百度百科", description, url, batch_num, website) else: senti_util.snapshot_home("百度百科", url, batch_num, website, driver) logger.info("百度百科没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.baidu.com/" if driver is None: senti_util.log_error("百度搜索", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("kw") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) time.sleep(5) # driver.find_element_by_xpath('//input[@name="wd"]').send_keys(website_name) senti_util.snapshot_home("百度搜索", url, batch_num, website, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') for result_table in soup.find_all('h3', class_='t'): a_click = result_table.find("a") title = a_click.get_text() if title.find(keyword) != -1: senti_util.senti_process_text("百度搜索", title, str(a_click.get("href")), batch_num, website) except Exception as e: logger.error(e) return finally: driver.quit()
class TestWangdaitianyan(object): if __name__ == "__main__": """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path="C:/chromedriver_2.38/chromedriver.exe") driver.get("http://www.wangdaibus.com/search.php?mod=forum") driver.find_element_by_id("scform_srchtxt").send_keys(u"京东") driver.find_element_by_id("scform_submit").click() """ driver = WebDriver.get_chrome() try: driver.get( "http://www.wangdaibus.com/search/list?subject=%E4%BA%AC%E4%B8%9C" ) aaa = "京东" #lement_by_xpath('//input[@name="subject"]').send_keys(aaa) #driver.find_element_by_xpath('//input[@name="subject"]').send_keys(Keys.ENTER) time.sleep(10) except Exception as e: # 异常处理 logger.error(e) pass SnapshotService.create_snapshot(driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("h3", attrs={'class': 'xs3'}) if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") logger.info("http://www.wangdaibus.com/" + href) logger.info(new.get_text()) '''
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://wenshu.court.gov.cn/" if driver is None: senti_util.log_error("黑猫投诉", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_xpath( "//*[@class='searchKey search-inp']") search_text_blank.send_keys(keyword) search_text_blank.send_keys(Keys.RETURN) time.sleep(3) source = driver.page_source senti_util.snapshot_home("裁判文书网", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') for a_tag in soup.find_all('a', class_='caseName'): href = a_tag.get("href") title = a_tag.get_text() if title.find(keyword) != -1: senti_util.senti_process_text( "裁判文书网", title, "http://wenshu.court.gov.cn/website/wenshu" + href[2:], batch_num, website) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, website_name, batch_num, merchant_name, merchant_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ driver = WebDriver.get_chrome() try: senti_util = SentiUtil() url = "http://tieba.baidu.com/f?fr=wwwt&kw=" + urllib.parse.quote( keyword) driver.get(url) senti_util.snapshot_home("百度贴吧", website_name, url, batch_num, merchant_name, merchant_num, driver) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all( "div", attrs={'class': 'threadlist_title pull_left j_th_tit '}) if news.__len__() > 0: for new in news: href = new.find_all('a')[0].get("href") content = new.find_all('a')[0].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "百度贴吧", website_name, content, "http://tieba.baidu.com" + href, batch_num, merchant_name, merchant_num) else: logger.info("百度贴吧没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) return finally: driver.quit()
def senti_process_text(platform, text, href, batch_num, website): driver = WebDriver.get_chrome() keyword_dao = KeywordDao() monitor_third_dao = MonitorThirdDao() # 截图 try: driver.get(href) snapshot = SnapshotService.create_snapshot(driver, batch_num, website, '舆情') is_normal = "正常" keywords = keyword_dao.get_all() for keyword in keywords: index = text.find(keyword.name) monitor_third = MonitorThird() monitor_third.website_name = website.website_name monitor_third.merchant_num = website.merchant_num monitor_third.merchant_name = website.merchant_name monitor_third.domain_name = website.domain_name monitor_third.saler = website.saler monitor_third.batch_num = batch_num monitor_third.url = href monitor_third.type = platform if index != -1: is_normal = "异常" monitor_third.is_normal = is_normal monitor_third.level = '高' monitor_third.outline = '检测到敏感词:' + str(keyword.name) monitor_third.snapshot = snapshot monitor_third_dao.add(monitor_third) else: pass if is_normal == "正常": if platform == "百度百科": monitor_third.level = '-' monitor_third.outline = '-' monitor_third.is_normal = is_normal monitor_third.snapshot = snapshot monitor_third_dao.add(monitor_third) pass except ConnectionError as conn_error: logger.error(conn_error) except Exception as e: logger.error(e) return finally: driver.quit()
def simulation_404(url): timestamp = str(time.time()) snapshot = timestamp + ".png" path = ims_rest_base + "/views/system/404.jsp?url=" + str(url) img_404 = base_filepath + "/" + timestamp try: driver = WebDriver.get_chrome() driver.get(path) driver.save_screenshot(img_404 + ".png") im = Image.open(img_404 + ".png") im_resize = im.resize((50, 50), Image.ANTIALIAS) im_resize.save(img_404 + "_thumb.bmp") except Exception as e: logger.info(e) return snapshot finally: driver.quit() return snapshot
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "https://www.wdzj.com/front/search/index?key=" + urllib.parse.quote( website_name) driver.get(url) source = driver.page_source senti_util.snapshot_home("网贷之家", merchant_name, url, batch_num, driver) soup = BeautifulSoup(source, 'html.parser') tzbox = soup.find_all("ul", attrs={'class': 'so-tzbox'}) if tzbox.__len__() == 0: return news = tzbox[0].find_all("li") if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text("网贷之家", merchant_name, content, "http://" + href[2:], batch_num) else: logger.info("网贷之家没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(website_name, merchant_name, batch_num): """ chrome_options = webdriver.ChromeOptions() chrome_options.add_argument('--headless') driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chromedriver_path) """ try: driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.wangdaibus.com/search/list?subject=" + urllib.parse.quote( website_name) driver.get(url) time.sleep(10) senti_util.snapshot_home("网贷巴士", merchant_name, url, batch_num, driver) # driver.find_element_by_xpath('//input[@name="srchtxt"]').send_keys(website_name) # driver.find_element_by_xpath('//input[@name="srchtxt"]').send_keys(Keys.ENTER) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') news = soup.find_all("h3", attrs={'class': 'xs3'}) if news.__len__() > 0: for new in news: if not gl.check_by_batch_num(batch_num): break href = new.find_all('a')[0].get("href") content = new.get_text() if content.find(website_name) != -1: senti_util.senti_process_text( "网贷巴士", merchant_name, content, "http://www.wangdaibus.com/" + href, batch_num) else: logger.info("网贷巴士没有搜索到数据: %s", merchant_name) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor(keyword, batch_num, website): driver = WebDriver.get_chrome() senti_util = SentiUtil() url = "http://www.zhifujie.com/search/search" if driver is None: senti_util.log_error("支付界", url, batch_num, website) return else: pass try: driver.get(url) search_text_blank = driver.find_element_by_id("searchbox") search_text_blank.send_keys(keyword) driver.find_element_by_xpath( '//button[contains(text(), "搜索")]').click() time.sleep(5) source = driver.page_source senti_util.snapshot_home("支付界", url, batch_num, website, driver) soup = BeautifulSoup(source, 'html.parser') items = soup.find_all(attrs={'class': 'main-news-content-item'}) if items.__len__() > 0: for item in items: href = item.find_all('a')[1].get("href") content = item.find_all('a')[1].get_text() if content.find(keyword) != -1: senti_util.senti_process_text( "支付界", content, "http://www.paycircle.cn" + href[1:], batch_num, website) else: logger.info("支付界没有搜索到数据: %s", keyword) except Exception as e: logger.error(e) senti_util.snapshot_home("支付界", url, batch_num, website, driver) return finally: driver.quit()
def monitor_website(weburl, batch_num): # 內容监控 keyword_dao = KeywordDao() keywords = keyword_dao.get_all() access = AccessibleService() monitor_weburl_dao = MonitorWeburlDao() monitor_weburl = MonitorUrl() monitor_weburl.website_name = weburl.website_name monitor_weburl.domain_name = weburl.domain_name monitor_weburl.merchant_name = weburl.merchant_name monitor_weburl.merchant_num = weburl.merchant_num monitor_weburl.saler = weburl.saler monitor_weburl.url = weburl.url monitor_weburl.batch_num = batch_num monitor_weburl.title = weburl.title # 监测死链接 reachable, current_url = access.get_access_res(weburl.url) use_proxy = False if reachable is None: logger.info("使用代理重试访问: %s", weburl.url) reachable, current_url = access.get_proxy_access_res(weburl.url) use_proxy = True else: logger.info("不使用代理可以访问: %s", weburl.url) if reachable is None: logger.info("检测到误404 : %s", weburl.url) monitor_weburl.outline = '检测到误404' monitor_weburl.is_normal = '异常' monitor_weburl.level = '高' snapshot = SnapshotService.simulation_404(weburl.url) monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '死链接' monitor_weburl_dao.add(monitor_weburl) return else: logger.info("url可以访问: %s", weburl.url) # 截图 if use_proxy: driver = WebDriver.get_proxy_chrome() else: driver = WebDriver.get_chrome() try: driver.get(weburl.url) snapshot = SnapshotService.snapshot_weburl(driver, batch_num, weburl, '网站内容') print(snapshot) print(monitor_weburl) monitor_weburl.outline = '网页打开正常' monitor_weburl.is_normal = '正常' monitor_weburl.level = '-' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '是否能打开' monitor_weburl_dao.add(monitor_weburl) source = driver.page_source soup = BeautifulSoup(source, 'html.parser') # 监测页面敏感词 for keyword in keywords: index = soup.find(keyword.name) if index is not None: logger.info("senti url alert,there is : %s", str(keyword.name)) monitor_weburl.outline = '检测到敏感词:' + str(keyword.name) monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '命中敏感词' monitor_weburl_dao.add(monitor_weburl) # 监测 非金融平台包含充值、提现、钱包功能 illegal_fun = soup.find("充值") if illegal_fun is not None: logger.info("senti url alert,there is : %s", str("充值")) monitor_weburl.outline = '检测到包含充值、提现、钱包功能' monitor_weburl.is_normal = '异常' monitor_weburl.level = '低' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '非法功能' monitor_weburl_dao.add(monitor_weburl) # 监测 误导宣传 mislead1 = soup.find("融宝资金担保") mislead2 = soup.find("融宝托管") if mislead1 is not None or mislead2 is not None: monitor_weburl.outline = '检测到误导宣传' monitor_weburl.is_normal = '异常' monitor_weburl.level = '中' monitor_weburl.snapshot = snapshot monitor_weburl.kinds = '误导宣传' monitor_weburl_dao.add(monitor_weburl) except Exception as e: logger.error(e) return finally: driver.quit()
def monitor_website(website, batch_num): monitor_website_dao = MonitorWebsiteDao if len(website.domain_name) == 0: logger.info("website_domain is None! merchant_name: %s ", website.merchant_name) monitor_website = MonitorWebsite() monitor_website.website_name = website.website_name monitor_website.merchant_name = website.merchant_name monitor_website.merchant_num = website.merchant_num monitor_website.domain_name = website.domain_name monitor_website.saler = website.saler monitor_website.batch_num = batch_num monitor_website.kinds = "首页是否可打开" monitor_website.level = '-' monitor_website.access = '异常' monitor_website.is_normal = '无法获取' monitor_website.outline = '商户域名为空。' monitor_website.level = '-' monitor_website.pageview = '-' monitor_website_dao.add(monitor_website) return else: logger.info("website_domain is not None! merchant_name: %s ", website.domain_name) # 首页监控 driver = WebDriver.get_phantomjs() service = TrafficService() access = AccessibleService() domain_names = str(website.domain_name) domain_name_list = domain_names.split(",") for domain_name in domain_name_list: try: logger.info("-------------------") logger.info("check whether website available,domain_name : %s", website.domain_name) # 截图 monitor_website = MonitorWebsite() monitor_website.website_name = website.website_name monitor_website.merchant_name = website.merchant_name monitor_website.merchant_num = website.merchant_num monitor_website.saler = website.saler monitor_website.domain_name = domain_name monitor_website.batch_num = batch_num monitor_website.kinds = "首页是否可打开" monitor_website.level = '-' monitor_website.snapshot = "" logger.info("预留使用代理入口...") # domain_name_rich, current_url = access.get_proxy_access_res(domain_name) # if domain_name_rich is None: # logger.info("不使用代理重试访问: %s", domain_name) # domain_name_rich, current_url = access.get_access_res(domain_name) # else: # logger.info("使用代理可以访问: %s", domain_name_rich) domain_name_rich, current_url = access.get_access_res( domain_name) logger.info("domain_name: %s", domain_name) logger.info("domain_name_rich: %s", domain_name_rich) logger.info("current_url: %s", current_url) if domain_name_rich is not None: logger.info("domain : %s", str(domain_name_rich)) monitor_website.access = '正常' monitor_website.is_normal = '正常' monitor_website.outline = '正常' monitor_website.level = '-' monitor_website.pageview = '-' monitor_website.batch_num = batch_num pageview = service.get_traffic( domain_name=domain_name_rich) monitor_website.pageview = pageview.reach_rank[0] try: driver.get(domain_name_rich) title = driver.title snapshot = SnapshotService.create_snapshot( driver, batch_num, website, '网站') monitor_website.snapshot = snapshot if title == '没有找到站点' or title == '未备案提示': monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = title monitor_website.level = '高' monitor_website_dao.add(monitor_website) else: monitor_website_dao.add(monitor_website) except Exception as e: logger.info(e) monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '首页访问检测到异常' monitor_website.level = '高' monitor_website.pageview = '-' monitor_website.snapshot = SnapshotService.simulation_404( domain_name) monitor_website.batch_num = batch_num monitor_website_dao.add(monitor_website) else: monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '首页访问检测到异常' monitor_website.level = '高' monitor_website.pageview = '-' monitor_website.batch_num = batch_num if current_url is None: logger.info("snapshot 404") monitor_website.snapshot = SnapshotService.simulation_404( domain_name) else: chrome_driver = WebDriver.get_chrome() try: chrome_driver.get(current_url) snapshot = SnapshotService.create_snapshot( chrome_driver, batch_num, website, '网站') monitor_website.snapshot = snapshot except Exception as e: logger.error(e) index = str(e).find("timeout") if index != -1: logger.info("访问超时") monitor_website.outline = '访问超时' monitor_website.snapshot = SnapshotService.simulation_404( current_url) else: monitor_website.outline = str(e) monitor_website.snapshot = SnapshotService.simulation_404( current_url) monitor_website_dao.add(monitor_website) return None, None finally: chrome_driver.quit() logger.info("website is not available : %s return!", domain_name) monitor_website_dao.add(monitor_website) return except Exception as e: logger.info(e) monitor_website.access = '异常' monitor_website.is_normal = '异常' monitor_website.outline = '巡检系统异常,建议手动重试!' monitor_website.level = '高' monitor_website_dao.add(monitor_website) finally: driver.quit()
def monitor(task_id, status): ims_api = ImsApi() tracking_dao = TrackingDetailDao() status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久', '6': '投递失败', '7': '可能异常'} normal_status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久'} tracking_details = tracking_dao.get_by_task(task_id, status) if tracking_details.__len__() > 0: try: driver = WebDriver.get_chrome() driver.get("https://www.trackingmore.com/login-cn.html") driver.find_element_by_id("email").send_keys("*****@*****.**") driver.find_element_by_id("password").send_keys("0418YXYwlx") driver.find_element_by_id("login_test").click() time.sleep(5) for tracking_detail in tracking_details: if gl.get_value('TRACKING_STATUS'): pass else: logger.info("快递单任务已停止,任务id:%s", task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) return tracking_detail.start_time = datetime.datetime.now() tracking_detail.status = "done" logger.info("准备检查单号:%s ", tracking_detail.tracking_num) try: driver.get( "https://my.51tracking.com/numbers.php?lang=cn&keywordType=trackNumber&p=1&searchnumber=" + tracking_detail.tracking_num) driver.maximize_window() time.sleep(3) # driver.find_element_by_class_name("show_lastEvent").click() driver.find_element_by_id('trackItem_0').click() time.sleep(1) snapshot = SnapshotService.snapshot_tracking(driver, tracking_detail) url = "https://my.51tracking.com/data/data-numbers.php?lang=cn&action=get_my_number" \ "&source=2&where=lang%3Dcn%26p%3D1%26keywordType%3DtrackNumber%26searchnumber%3D" \ + tracking_detail.tracking_num + "&page=1" driver.get(url) json_data = driver.find_element_by_tag_name("body").text json_obj = json.loads(str(json_data)) status = json_obj['data'][0]['track_status'] tracking_detail.des = status_dict[status] tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = snapshot if status in normal_status_dict: logger.info("单号巡检状态:%s", status) tracking_detail.result = "true" else: tracking_detail.result = "false" tracking_dao.update(tracking_detail) except Exception as e: logger.error(e) tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) time.sleep(600) except Exception as e: logger.error(e) tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) finally: driver.quit() else: logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False)