def monitor_website(website, batch_num):
        monitor_website_dao = MonitorWebsiteDao
        if len(website.domain_name) == 0:
            logger.info("website_domain is None! merchant_name: %s ",
                        website.merchant_name)
            monitor_website = MonitorWebsite()
            monitor_website.website_name = website.website_name
            monitor_website.merchant_name = website.merchant_name
            monitor_website.merchant_num = website.merchant_num
            monitor_website.domain_name = website.domain_name
            monitor_website.saler = website.saler
            monitor_website.batch_num = batch_num
            monitor_website.kinds = "首页是否可打开"
            monitor_website.level = '-'
            monitor_website.access = '异常'
            monitor_website.is_normal = '无法获取'
            monitor_website.outline = '商户域名为空。'
            monitor_website.level = '-'
            monitor_website.pageview = '-'
            monitor_website_dao.add(monitor_website)
            return
        else:
            logger.info("website_domain is not None! merchant_name: %s ",
                        website.domain_name)
        # 首页监控
        driver = WebDriver.get_phantomjs()
        service = TrafficService()
        access = AccessibleService()

        domain_names = str(website.domain_name)
        domain_name_list = domain_names.split(",")
        for domain_name in domain_name_list:
            try:
                logger.info("-------------------")
                logger.info("check whether website available,domain_name : %s",
                            website.domain_name)
                #  截图
                monitor_website = MonitorWebsite()
                monitor_website.website_name = website.website_name
                monitor_website.merchant_name = website.merchant_name
                monitor_website.merchant_num = website.merchant_num
                monitor_website.saler = website.saler
                monitor_website.domain_name = domain_name
                monitor_website.batch_num = batch_num
                monitor_website.kinds = "首页是否可打开"
                monitor_website.level = '-'
                monitor_website.snapshot = ""
                logger.info("预留使用代理入口...")
                # domain_name_rich, current_url = access.get_proxy_access_res(domain_name)
                # if domain_name_rich is None:
                #     logger.info("不使用代理重试访问: %s", domain_name)
                #     domain_name_rich, current_url = access.get_access_res(domain_name)
                # else:
                #     logger.info("使用代理可以访问: %s", domain_name_rich)
                domain_name_rich, current_url = access.get_access_res(
                    domain_name)
                logger.info("domain_name: %s", domain_name)
                logger.info("domain_name_rich: %s", domain_name_rich)
                logger.info("current_url: %s", current_url)
                if domain_name_rich is not None:
                    logger.info("domain : %s", str(domain_name_rich))
                    monitor_website.access = '正常'
                    monitor_website.is_normal = '正常'
                    monitor_website.outline = '正常'
                    monitor_website.level = '-'
                    monitor_website.pageview = '-'
                    monitor_website.batch_num = batch_num
                    pageview = service.get_traffic(
                        domain_name=domain_name_rich)
                    monitor_website.pageview = pageview.reach_rank[0]
                    try:
                        driver.get(domain_name_rich)
                        title = driver.title
                        snapshot = SnapshotService.create_snapshot(
                            driver, batch_num, website, '网站')
                        monitor_website.snapshot = snapshot
                        if title == '没有找到站点' or title == '未备案提示':
                            monitor_website.access = '异常'
                            monitor_website.is_normal = '异常'
                            monitor_website.outline = title
                            monitor_website.level = '高'
                            monitor_website_dao.add(monitor_website)
                        else:
                            monitor_website_dao.add(monitor_website)
                    except Exception as e:
                        logger.info(e)
                        monitor_website.access = '异常'
                        monitor_website.is_normal = '异常'
                        monitor_website.outline = '首页访问检测到异常'
                        monitor_website.level = '高'
                        monitor_website.pageview = '-'
                        monitor_website.snapshot = SnapshotService.simulation_404(
                            domain_name)
                        monitor_website.batch_num = batch_num
                        monitor_website_dao.add(monitor_website)
                else:
                    monitor_website.access = '异常'
                    monitor_website.is_normal = '异常'
                    monitor_website.outline = '首页访问检测到异常'
                    monitor_website.level = '高'
                    monitor_website.pageview = '-'
                    monitor_website.batch_num = batch_num
                    if current_url is None:
                        logger.info("snapshot 404")
                        monitor_website.snapshot = SnapshotService.simulation_404(
                            domain_name)
                    else:
                        chrome_driver = WebDriver.get_chrome()
                        try:
                            chrome_driver.get(current_url)
                            snapshot = SnapshotService.create_snapshot(
                                chrome_driver, batch_num, website, '网站')
                            monitor_website.snapshot = snapshot
                        except Exception as e:
                            logger.error(e)
                            index = str(e).find("timeout")
                            if index != -1:
                                logger.info("访问超时")
                                monitor_website.outline = '访问超时'
                                monitor_website.snapshot = SnapshotService.simulation_404(
                                    current_url)
                            else:
                                monitor_website.outline = str(e)
                                monitor_website.snapshot = SnapshotService.simulation_404(
                                    current_url)
                            monitor_website_dao.add(monitor_website)
                            return None, None
                        finally:
                            chrome_driver.quit()
                    logger.info("website is not available : %s return!",
                                domain_name)
                    monitor_website_dao.add(monitor_website)
                    return
            except Exception as e:
                logger.info(e)
                monitor_website.access = '异常'
                monitor_website.is_normal = '异常'
                monitor_website.outline = '巡检系统异常,建议手动重试!'
                monitor_website.level = '高'
                monitor_website_dao.add(monitor_website)
            finally:
                driver.quit()
    def monitor_website(weburl, batch_num):
        # 內容监控
        keyword_dao = KeywordDao()
        keywords = keyword_dao.get_all()
        access = AccessibleService()

        monitor_weburl_dao = MonitorWeburlDao()
        monitor_weburl = MonitorUrl()
        monitor_weburl.website_name = weburl.website_name
        monitor_weburl.domain_name = weburl.domain_name
        monitor_weburl.merchant_name = weburl.merchant_name
        monitor_weburl.merchant_num = weburl.merchant_num
        monitor_weburl.saler = weburl.saler
        monitor_weburl.url = weburl.url
        monitor_weburl.batch_num = batch_num
        monitor_weburl.title = weburl.title
        # 监测死链接
        reachable, current_url = access.get_access_res(weburl.url)
        use_proxy = False
        if reachable is None:
            logger.info("使用代理重试访问: %s", weburl.url)
            reachable, current_url = access.get_proxy_access_res(weburl.url)
            use_proxy = True
        else:
            logger.info("不使用代理可以访问: %s", weburl.url)
        if reachable is None:
            logger.info("检测到误404 : %s", weburl.url)
            monitor_weburl.outline = '检测到误404'
            monitor_weburl.is_normal = '异常'
            monitor_weburl.level = '高'
            snapshot = SnapshotService.simulation_404(weburl.url)
            monitor_weburl.snapshot = snapshot
            monitor_weburl.kinds = '死链接'
            monitor_weburl_dao.add(monitor_weburl)
            return
        else:
            logger.info("url可以访问: %s", weburl.url)
            #  截图
        if use_proxy:
            driver = WebDriver.get_proxy_chrome()
        else:
            driver = WebDriver.get_chrome()
        try:
            driver.get(weburl.url)
            snapshot = SnapshotService.snapshot_weburl(driver, batch_num,
                                                       weburl, '网站内容')
            print(snapshot)
            print(monitor_weburl)
            monitor_weburl.outline = '网页打开正常'
            monitor_weburl.is_normal = '正常'
            monitor_weburl.level = '-'
            monitor_weburl.snapshot = snapshot
            monitor_weburl.kinds = '是否能打开'
            monitor_weburl_dao.add(monitor_weburl)

            source = driver.page_source
            soup = BeautifulSoup(source, 'html.parser')
            # 监测页面敏感词
            for keyword in keywords:
                index = soup.find(keyword.name)
                if index is not None:
                    logger.info("senti url alert,there is : %s",
                                str(keyword.name))
                    monitor_weburl.outline = '检测到敏感词:' + str(keyword.name)
                    monitor_weburl.is_normal = '异常'
                    monitor_weburl.level = '低'
                    monitor_weburl.snapshot = snapshot
                    monitor_weburl.kinds = '命中敏感词'

                    monitor_weburl_dao.add(monitor_weburl)
            # 监测 非金融平台包含充值、提现、钱包功能
            illegal_fun = soup.find("充值")
            if illegal_fun is not None:
                logger.info("senti url alert,there is : %s", str("充值"))
                monitor_weburl.outline = '检测到包含充值、提现、钱包功能'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '低'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '非法功能'

                monitor_weburl_dao.add(monitor_weburl)
            # 监测 误导宣传
            mislead1 = soup.find("融宝资金担保")
            mislead2 = soup.find("融宝托管")
            if mislead1 is not None or mislead2 is not None:
                monitor_weburl.outline = '检测到误导宣传'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '中'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '误导宣传'

                monitor_weburl_dao.add(monitor_weburl)
        except Exception as e:
            logger.error(e)
            return
        finally:
            driver.quit()
    def monitor_website(weburl, batch_num):
        # 內容监控
        keyword_dao = KeywordDao()
        keywords = keyword_dao.get_all()

        monitor_weburl_dao = MonitorWeburlDao()
        monitor_weburl = MonitorUrl()
        monitor_weburl.website_name = weburl.website_name
        monitor_weburl.domain_name = weburl.domain_name
        monitor_weburl.merchant_name = weburl.merchant_name
        monitor_weburl.merchant_num = weburl.merchant_num
        monitor_weburl.saler = weburl.saler
        monitor_weburl.url = weburl.url
        monitor_weburl.batch_num = batch_num
        monitor_weburl.title = weburl.title
        driver = WebDriver.get_phantomjs()
        try:
            logger.info("monitor_url: %s", weburl.url)
            if str(weburl.url).startswith("http"):
                print()
            else:
                weburl.url = "http://" + weburl.url
            logger.info("weburl.url: %s", weburl)
            logger.info("weburl.url: %s", weburl.url)
            driver.get(weburl.url)
            snapshot = SnapshotService.snapshot_weburl(driver, batch_num,
                                                       weburl, '网站内容')
            logger.info("snapshot: %s", snapshot)
            monitor_weburl.outline = ''
            monitor_weburl.is_normal = '正常'
            monitor_weburl.level = '-'
            monitor_weburl.snapshot = snapshot
            monitor_weburl.kinds = '是否能打开'
            logger.info("monitor_url: add %s", weburl.url)
            source = driver.page_source
            if source.__eq__('<html><head></head><body></body></html>'):
                monitor_weburl.outline = '网页打开异常'
                monitor_weburl.is_normal = '异常'
            else:
                print()
            monitor_weburl_dao.add(monitor_weburl)
            soup = BeautifulSoup(source, 'html.parser')
            # 监测页面敏感词
            for keyword in keywords:
                index = soup.find(keyword.name)
                if index is not None:
                    logger.info(
                        "senti url alert,there is [ %s] in the url page!",
                        str(keyword.name))
                    monitor_weburl.outline = '检测到敏感词:' + str(keyword.name)
                    monitor_weburl.is_normal = '异常'
                    monitor_weburl.level = '低'
                    monitor_weburl.snapshot = snapshot
                    monitor_weburl.kinds = '命中敏感词'
                    monitor_weburl_dao.add(monitor_weburl)
            # 监测 非金融平台包含充值、提现、钱包功能
            illegal_fun = soup.find("充值")
            if illegal_fun is not None:
                logger.info("senti url alert,there is  [ %s] in the url page!",
                            str("充值"))
                monitor_weburl.outline = '检测到包含充值、提现、钱包功能'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '低'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '非法功能'
                monitor_weburl_dao.add(monitor_weburl)
            # 监测 误导宣传
            mislead1 = soup.find("融宝资金担保")
            mislead2 = soup.find("融宝托管")
            if mislead1 is not None or mislead2 is not None:
                monitor_weburl.outline = '检测到误导宣传'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '中'
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '误导宣传'
                monitor_weburl_dao.add(monitor_weburl)
        except Exception as e:
            # ERROR No transaction is begun.
            logger.error(e)
            conn = DB_Session()
            try:
                logger.info("检测到误404 : %s", weburl.url)
                monitor_weburl.outline = '检测到页面404'
                monitor_weburl.is_normal = '异常'
                monitor_weburl.level = '高'
                snapshot = SnapshotService.simulation_404(weburl.url)
                monitor_weburl.snapshot = snapshot
                monitor_weburl.kinds = '死链接'
                logger.info("monitor_url:Exception %s", weburl.url)
                monitor_weburl_dao.add(monitor_weburl)
            except Exception as e:
                logger.info(e)
                conn.rollback()
                raise
            finally:
                conn.close()
        finally:
            driver.quit()
 def monitor_website(website, batch_num):
     monitor_website_dao = MonitorWebsiteDao
     monitor_website = MonitorWebsite()
     monitor_website.website_name = website.website_name
     monitor_website.merchant_name = website.merchant_name
     monitor_website.merchant_num = website.merchant_num
     monitor_website.domain_name = website.domain_name
     monitor_website.saler = website.saler
     monitor_website.batch_num = batch_num
     monitor_website.kinds = "首页是否可打开"
     monitor_website.level = '高'
     monitor_website.access = '异常'
     monitor_website.is_normal = '异常'
     monitor_website.pageview = '-'
     if len(website.domain_name) == 0:
         logger.info("website_domain is None! merchant_name: %s ",
                     website.merchant_name)
         monitor_website.outline = '商户网址为空。'
         monitor_website_dao.add(monitor_website)
         return
     else:
         logger.info("domain_name is %s! Go to inspect... ",
                     website.domain_name)
     # 首页监控
     domain_names = str(website.domain_name)
     domain_name_list = domain_names.split(",")
     for domain_name in domain_name_list:
         logger.info("-------------------")
         domain_name_rich = domain_name
         dns = domain_name
         if str(domain_name).startswith("http"):
             temp = domain_name[domain_name.find("/") + 2:]
             logger.info("domain with out http::  %s", temp)
             if str(temp).find("/") == -1:
                 dns = temp
             else:
                 start = temp.find("/")
                 dns = temp[0:start]
         else:
             if str(domain_name).find("/") == -1:
                 pass
             else:
                 start = domain_name.find("/")
                 dns = domain_name[0:start]
             pass
             domain_name_rich = "http://" + domain_name
         try:
             logger.info("dns:  %s", dns)
             conn = http.client.HTTPSConnection(dns, timeout=10)
             conn.request('GET', domain_name_rich)
             resp = conn.getresponse()
             code = resp.code
             logger.info("code:  %s", code)
             if code == 200:
                 logger.info("使用webdriver进行截图:  %s ... ", domain_name_rich)
                 try:
                     driver = WebDriver.get_phantomjs()
                     driver.get(domain_name_rich)
                     current_url = driver.current_url
                     title = driver.title
                     source = driver.page_source
                     snapshot = SnapshotService.create_snapshot(
                         driver, batch_num, website, '网站')
                     logger.info("title:  %s", title)
                     logger.info("current_url:  %s", current_url)
                     if str(current_url) == "about:blank" and str(
                             source
                     ) == "<html><head></head><body></body></html>" and str(
                             title) == "":
                         logger.info("检测到about:blank :  %s", current_url)
                         monitor_website.outline = "网站疑似无法访问"
                         monitor_website.snapshot = SnapshotService.simulation_404(
                             domain_name_rich)
                         monitor_website_dao.add(monitor_website)
                         driver.quit()
                         continue
                     else:
                         pass
                     if str(current_url).index(domain_name_rich[7:]) == -1:
                         logger.info("疑似跳转...:  %s", current_url)
                         monitor_website.outline = "疑似跳转,检测到首页地址为:" + current_url
                         monitor_website.snapshot = snapshot
                         monitor_website_dao.add(monitor_website)
                         driver.quit()
                         continue
                     else:
                         pass
                     monitor_website.snapshot = snapshot
                     logger.info("check title和source...")
                     if title.__contains__('404'):
                         monitor_website.outline = "疑似异常,检测到404"
                     elif source.__contains__('ERR_NAME_NOT_RESOLVED'):
                         monitor_website.outline = "疑似异常,Title信息:" + title
                     elif source.__contains__('ERR_CONNECTION_REFUSED'):
                         monitor_website.outline = "疑似异常,检测到 ERR_CONNECTION_REFUSED"
                     elif source.__contains__('ERR_CONNECTION_TIMED_OUT'):
                         monitor_website.outline = "疑似异常,检测到 ERR_CONNECTION_TIMED_OUT"
                     elif source.__contains__('ERR_NAME_NOT_RESOLVED'):
                         monitor_website.outline = "疑似异常,检测到 ERR_NAME_NOT_RESOLVED"
                     elif source.__contains__('ERR_NAME_RESOLUTION_FAILED'):
                         monitor_website.outline = "疑似异常,检测到 ERR_NAME_RESOLUTION_FAILED"
                     elif source.__contains__(
                             'DNS_PROBE_FINISHED_NXDOMAIN'):
                         monitor_website.outline = "疑似异常,检测到 DNS_PROBE_FINISHED_NXDOMAIN"
                     elif source.__contains__('ERR_EMPTY_RESPONSE'):
                         monitor_website.outline = "疑似异常,检测到 ERR_EMPTY_RESPONSE"
                     elif source.__contains__('主机开设成功'):
                         monitor_website.outline = "疑似异常,检测到类似网站在建信息"
                     elif source.__contains__('非法阻断'):
                         monitor_website.outline = "疑似异常,检测到非法阻断"
                     elif source.__contains__('Bad Request'):
                         monitor_website.outline = "疑似异常,检测到 Bad Request"
                     elif source.__contains__('404 page not found'):
                         monitor_website.outline = "疑似异常,检测到 404 page not found"
                     elif source.__contains__(
                             'https://wanwang.aliyun.com/domain/parking'):
                         monitor_website.outline = "疑似异常,检测到阻断拦截"
                     elif source.__contains__('没有找到站点'):
                         monitor_website.outline = "疑似异常,没有找到站点"
                     elif source.__contains__('未备案提示'):
                         monitor_website.outline = "疑似异常,未备案提示"
                     elif str(
                             source
                     ) == "<html><head></head><body></body></html>" and str(
                             title) == "":
                         monitor_website.snapshot = SnapshotService.simulation_404(
                             domain_name_rich)
                         monitor_website.outline = "疑似无法访问"
                     else:
                         monitor_website.outline = '检测正常'
                         monitor_website.access = '正常'
                         monitor_website.is_normal = '正常'
                         monitor_website.level = '-'
                     logger.info("outline:  %s", monitor_website.outline)
                     monitor_website_dao.add(monitor_website)
                 except Exception as e:
                     logger.error(e)
                     monitor_website.snapshot = SnapshotService.simulation_404(
                         domain_name_rich)
                     monitor_website.outline = '访问超时,可能被目标网站屏蔽,建议手动验证!'
                     monitor_website_dao.add(monitor_website)
                 finally:
                     driver.quit()
             else:
                 logger.info("确定无法访问!")
                 monitor_website.outline = '检测到网站异常'
                 monitor_website.snapshot = SnapshotService.simulation_404(
                     domain_name_rich)
                 monitor_website_dao.add(monitor_website)
         except Exception as e:
             logger.error(e)
             logger.info("urlopen 无法打开页面..")
             monitor_website.outline = 'urlopen无法打开网站。'
             monitor_website.snapshot = SnapshotService.simulation_404(
                 domain_name_rich)
             monitor_website_dao.add(monitor_website)