Esempio n. 1
0
def dark_start(target):
    from dark_core.database.mysqlManger import sqlMg
    from hiddenDetect import hiddenlink_obj
    from dark_core.output.console import consoleLog
    from dark_core.output.textFile import fileLog
    from dark_core.output.logging import logger
    from dark_core.profile.profile import pf
    from dark_core.settings.settings import settings
    from dark_core.parser.urlParser import url_object
    from datetime import datetime

    # 设置日志模块
    if pf.getLogType() == 'True':
        file_path = settings.get('LOG_FILE_PATH')
        datetimestrf = datetime.now().strftime('%Y-%m-%d')
        domain = url_object(target).getRootDomain  # 获取当前页面的根域名
        file_name = file_path + domain + '_' + datetimestrf + '.log'  # 检测文件名按域名_时间.log的形式加载
        fileLog.set_file_name(file_name)  # 设置日志文件名
        logger.setOutputPlugin(fileLog)
    else:
        logger.setOutputPlugin(consoleLog)

    # 执行检测
    hidden = hiddenlink_obj(target)
    hidden.init()
    hidden.run()
    hidden.finsh()

    # 关闭相关数据库的连接和日志打印模块
    sqlMg.dispose()
    logger.endLogging()
Esempio n. 2
0
    def __init__(self, profname='', workdir=None):
        '''
        构造函数。

        @:parameter profname: 文件名(可带路径)。
        @:parameter workdir: 工作目录名。
        '''
        create_home_dir()
        # 默认的optionxform把option转换为小写,我们需要保持原样。
        optionxform = lambda opt: opt
        self._config = ConfigParser.ConfigParser()
        self._config.optionxform = optionxform

        if profname:
            profname = self._getRealProfileName(profname, workdir)
            with codecs.open(profname, "rb",
                             settings.get('DEFAULT_ENCODING')) as fp:
                try:
                    self._config.readfp(fp)
                except ConfigParser.Error, cpe:
                    raise DarkException, _('ConfigParser error in profile: "(profname)%s". Exception: "(exception)%s"') % \
                            {'profname':profname, 'exception':str(cpe)}
                except Exception, e:
                    raise DarkException, _('Unknown error in profile: "(profname)%s". Exception: "%(exception)s"') % \
                            {'profname': profname, 'exception':str(e)}
Esempio n. 3
0
def dark_start(target):
    from dark_core.database.mysqlManger import sqlMg
    from hiddenDetect import hiddenlink_obj
    from dark_core.output.console import consoleLog
    from dark_core.output.textFile import fileLog
    from dark_core.output.logging import logger
    from dark_core.profile.profile import pf
    from dark_core.settings.settings import settings
    from dark_core.parser.urlParser import url_object
    from datetime import datetime

    # 设置日志模块
    if pf.getLogType() == "True":
        file_path = settings.get("LOG_FILE_PATH")
        datetimestrf = datetime.now().strftime("%Y-%m-%d")
        domain = url_object(target).getRootDomain  # 获取当前页面的根域名
        file_name = file_path + domain + "_" + datetimestrf + ".log"  # 检测文件名按域名_时间.log的形式加载
        fileLog.set_file_name(file_name)  # 设置日志文件名
        logger.setOutputPlugin(fileLog)
    else:
        logger.setOutputPlugin(consoleLog)

    # 执行检测
    hidden = hiddenlink_obj(target)
    hidden.init()
    hidden.run()
    hidden.finsh()

    # 关闭相关数据库的连接和日志打印模块
    sqlMg.dispose()
    logger.endLogging()
Esempio n. 4
0
    def __init__(self, url):
        self.url = url  # 要加载快照生成快照的组件名
        self.target = self.url.replace('/', '_')
        self.root_target = url_object(self.url).getDomain
        self.snapshot_path = settings.get('SNAPSHOT_PATH')

        self._file_name = self.target + '_snapshot.html'
        self._file_path = os.path.join(self.snapshot_path, self._file_name)     # eg:/tmp/www.kingboxs.com_aaa_snapshot.html

        self._initialized = False  # 初始化标志
Esempio n. 5
0
    def __init__(self, url):
        self.url = url  # 要加载快照生成快照的组件名
        self.target = self.url.replace('/', '_')
        self.root_target = url_object(self.url).getDomain
        self.snapshot_path = settings.get('SNAPSHOT_PATH')

        self._file_name = self.target + '_snapshot.html'
        self._file_path = os.path.join(
            self.snapshot_path,
            self._file_name)  # eg:/tmp/www.kingboxs.com_aaa_snapshot.html

        self._initialized = False  # 初始化标志
Esempio n. 6
0
    def __init__(self, obj):
        self.obj = obj  # 加载要进行报告生成的组件名
        self.target = url_object(self.obj.url).getDomain
        root_path = os.path.dirname(os.path.realpath(__file__))  # 获取当前文件的工作目录
        self.reportPath = settings.get("REPORT_PATH")  # 设置报告生成的根目录

        self._initialized = False  # 初始化标志

        self._html_filepath = root_path + os.path.sep
        self._style_main_filename = self._html_filepath + "main.css"  # 加载的css文件位置
        # These attributes hold the file pointers
        self._file = None

        datetimestrf = datetime.now().strftime("%Y-%m-%d")

        self._file_name = self.target + "_" + datetimestrf + "_a.html"
        self._file_path = os.path.join(self.reportPath, self._file_name)
Esempio n. 7
0
    def __init__(self, obj):
        self.obj = obj  # 加载要进行报告生成的组件名
        self.target = url_object(self.obj.url).getDomain
        root_path = os.path.dirname(os.path.realpath(__file__))  # 获取当前文件的工作目录
        self.reportPath = settings.get('REPORT_PATH')  # 设置报告生成的根目录

        self._initialized = False  # 初始化标志

        self._html_filepath = root_path + os.path.sep
        self._style_main_filename = self._html_filepath + 'main.css'  # 加载的css文件位置
        # These attributes hold the file pointers
        self._file = None

        datetimestrf = datetime.now().strftime('%Y-%m-%d')

        self._file_name = self.target + '_' + datetimestrf + '_a.html'
        self._file_path = os.path.join(self.reportPath, self._file_name)
Esempio n. 8
0
    def __init__(self, profname='', workdir=None):
        '''
        构造函数。

        @:parameter profname: 文件名(可带路径)。
        @:parameter workdir: 工作目录名。
        '''
        create_home_dir()
        # 默认的optionxform把option转换为小写,我们需要保持原样。
        optionxform = lambda opt: opt
        self._config = ConfigParser.ConfigParser()
        self._config.optionxform = optionxform
        
        if profname:
            profname = self._getRealProfileName(profname, workdir)
            with codecs.open(profname, "rb", settings.get('DEFAULT_ENCODING')) as fp:
                try:
                    self._config.readfp(fp)
                except ConfigParser.Error, cpe:
                    raise DarkException, _('ConfigParser error in profile: "(profname)%s". Exception: "(exception)%s"') % \
                            {'profname':profname, 'exception':str(cpe)}
                except Exception, e:
                    raise DarkException, _('Unknown error in profile: "(profname)%s". Exception: "%(exception)s"') % \
                            {'profname': profname, 'exception':str(e)}
Esempio n. 9
0
    def genHtmlReport(self):
        """
        This method is called when the scan has finished.
        """
        # Just in case...
        if not self._initialized:
            self._init()
            try:
                malwebRes = self.obj.resultHiddenlink

                starttime = self.obj.strStartTime
                interval = self.obj.interval

                rname = u"恶意内容扫描报告"

                div_wrapper_str = u'<div id="wrapper">\n' u'\t<div class="tt">\n' u"\t\t<p>深信服安全中心云扫描服务</p></div>\n"
                self._write_to_file(div_wrapper_str)

                div_header_str = (
                    u'\t<div id="header">\n'
                    u"\t\t<h1>WEB应用%s</h1>\n"
                    u"\t\t<ul>\n"
                    u"\t\t\t<li>目标网站:%s</li>\n"
                    u"\t\t\t<li>开始时间:%s</li>\n"
                    u"\t\t\t<li>扫描时长:%s</li>\n"
                    u"\t\t</ul>\n" % (rname, self.target, starttime, interval)
                )
                self._write_to_file(div_header_str)

                if len(malwebRes) != 0:
                    malwebContRes = True
                else:
                    malwebContRes = False

                if malwebContRes == True:
                    div_tc_str = (
                        u'\t\t<div class="tc">\n'
                        u"\t\t\t<h2>经检测:</h2>\n"
                        u"\t\t\t<p>发现该网站存在恶意网页(暗链|挂马|webshell),证明该网站已经遭到入侵。请尽快删除恶意网页或内容。</p>\n"
                        u"\t\t</div>\n"
                    )
                    self._write_to_file(div_tc_str)
                else:
                    div_tc_str = (
                        u'\t\t<div class="tc">\n'
                        u"\t\t\t<h2>经检测:</h2>\n"
                        u"\t\t\t<p>没有发现该网站存在恶意内容。</p>\n"
                        u"\t\t</div>\n"
                    )
                    self._write_to_file(div_tc_str)

                div_end_str = u"\t</div>\n"
                self._write_to_file(div_end_str)

                # 目标站点信息
                div_body_str = (
                    u'\t<div class="body">\n'
                    u"\t\t<!-- 重复性模块:开始 -->\n"
                    u'\t\t<div class="m">\n'
                    u'\t\t\t<div class="m-h">\n'
                    u"\t\t\t\t<h2>恶意网页检测结果</h2>\n"
                    u"\t\t\t</div>\n"
                    u'\t\t\t<div class="m-b">\n'
                )
                self._write_to_file(div_body_str)

                if len(malwebRes):
                    hdurl_num = len(malwebRes)  # 当前检测到的暗链数量
                    div_c_str = (
                        u'\t\t\t\t<div class="c">\n'
                        u'\t\t\t\t\t<dl class="c-t">\n'
                        u"\t\t\t\t\t\t<dt><strong>暗链(%d)</strong></dt>\n"
                        u"\t\t\t\t\t\t<dd> <strong>描述:</strong><br/>\n"
                        u"\t\t\t\t\t\t\t暗链是指攻击者通过各种攻击手段向网站的正常网页中植入视觉上令人难以察觉的链接,这些链接往往是网游私服、医疗、博彩、色情,甚至是反动网站的网站链接。<br/>\n"
                        u"\t\t\t\t\t\t\t<strong>修复建议:</strong><br/>\n"
                        u"\t\t\t\t\t\t\t删除暗链代码,同时修复网站漏洞防止再次被植入暗链。\n"
                        u"\t\t\t\t\t\t</dd>\n"
                        u"\t\t\t\t\t</dl>\n"
                        u'\t\t\t\t\t<div class="c-l">\n' % hdurl_num
                    )
                    self._write_to_file(div_c_str)

                    k = 0
                    # hd_url: 包含暗链的分支网站, hd_set: 该分支网站下的暗链信息{url:(content, level, type)}
                    for (hd_url, hd_set) in malwebRes.items():
                        k = k + 1
                        links_html = u""
                        for (include_url, include_property) in hd_set.items():
                            content = include_property[0]
                            level = include_property[1]
                            type = include_property[2]
                            links_html += u"\t\t\t\t\t\t\t\t\t<li>链接:%s\t内容:%s\t等级:%s\t类型:%s</li>\n" % (
                                include_url,
                                content,
                                level,
                                type,
                            )

                        div_c_item_str = (
                            u'\t\t\t\t\t\t<div class="c-item">\n'
                            u'\t\t\t\t\t\t\t<div class="c-h">\n'
                            u"\t\t\t\t\t\t\t\t<h3>恶意网页%d/%d</h3>\n"
                            u"\t\t\t\t\t\t\t</div>\n"
                            u'\t\t\t\t\t\t\t<div class="c-b">\n'
                            u"\t\t\t\t\t\t\t\t<dl>\n"
                            u"\t\t\t\t\t\t\t\t\t<dt>页面URL:</dt>\n"
                            u'\t\t\t\t\t\t\t\t\t<dd><a href="%s">发现"%s"存在暗链</a></dd>\n'
                            u"\t\t\t\t\t\t\t\t</dl>\n"
                            u"\t\t\t\t\t\t\t\t<dl>\n"
                            u"\t\t\t\t\t\t\t\t\t<dt>严重等级:</dt>\n"
                            u'\t\t\t\t\t\t\t\t\t<dd><strong class="high">%s</strong>\n'
                            u"\t\t\t\t\t\t\t\t</dl>\n"
                            u"\t\t\t\t\t\t\t\t<dl>\n"
                            u"\t\t\t\t\t\t\t\t\t<dt>恶意内容:</dt>\n"
                            u"\t\t\t\t\t\t\t\t\t<dd></dd>\n"
                            u"\t\t\t\t\t\t\t\t</dl>\n"
                            u'\t\t\t\t\t\t\t\t<code class="quote">\n'
                            u"%s"
                            u"\t\t\t\t\t\t\t\t</code>\n"
                            u"\t\t\t\t\t\t\t</div>\n"
                            u"\t\t\t\t\t\t</div>\n"
                            % (k, hdurl_num, hd_url, hd_url, settings.get("THREAT_LEVEL"), links_html)
                        )
                        self._write_to_file(div_c_item_str)
                    div_end_str = u"\t\t\t\t\t</div>\n" u"\t\t\t\t</div>\n" u"\t\t\t</div>\n"

                    self._write_to_file(div_end_str)

                else:
                    div_c_str = u"\t\t\t\t未发现任何恶意网页!\n"
                    self._write_to_file(div_c_str)
                    div_end_str = u"\t\t\t</div>\n"
                    self._write_to_file(div_end_str)

                div_end_str = u"\t\t</div>\n" u"\t</div>\n"
                self._write_to_file(div_end_str)

                html_end_str = u"</div>\n</body>\n</html>\n"
                self._write_to_file(html_end_str)

            except Exception, e:
                logger.error("Why this happen, report will return none:%s" % e)
                self._file.close()
                return None
Esempio n. 10
0
        try:
            main_style_file = open(self._style_main_filename, "r")
        except:
            raise DarkException, _("Cant open style file " + self._html_filepath + ".")
        else:
            doctype_str = (
                u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
                u'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
                u'<html xmlns="http://www.w3.org/1999/xhtml">\n'
            )
            self._write_to_file(doctype_str)
            head_str = (
                u"<head>\n"
                u"\t<title>%s</title>\n"
                u'\t<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n'
                u'\t<STYLE TYPE="text/css">\n' % unicode(settings.get("HTML_TITLE"))
            )
            self._write_to_file(head_str)
            self._write_to_file(main_style_file.read())
            headend_str = u"\n\t</style>\n</head>\n"
            self._write_to_file(headend_str)
            main_style_file.close()
            body_str = u"<body>\n"
            self._write_to_file(body_str)

    def _write_to_file(self, msg):
        """
        Write msg to the file.

        @parameter msg: The message string.
        """
Esempio n. 11
0
 def __init__(self):
     self.browser = Browser(settings.get('BROWSER_TYPE'))
     logger.info('Splinter browser is init!')
Esempio n. 12
0
    def genHtmlReport(self):
        '''
        This method is called when the scan has finished.
        '''
        # Just in case...
        if not self._initialized:
            self._init()
            try:
                malwebRes = self.obj.resultHiddenlink

                starttime = self.obj.strStartTime
                interval = self.obj.interval

                rname = u'恶意内容扫描报告'

                div_wrapper_str = u'<div id="wrapper">\n' \
                                  u'\t<div class="tt">\n' \
                                  u'\t\t<p>深信服安全中心云扫描服务</p></div>\n'
                self._write_to_file(div_wrapper_str)

                div_header_str = u'\t<div id="header">\n' \
                                 u'\t\t<h1>WEB应用%s</h1>\n' \
                                 u'\t\t<ul>\n' \
                                 u'\t\t\t<li>目标网站:%s</li>\n' \
                                 u'\t\t\t<li>开始时间:%s</li>\n' \
                                 u'\t\t\t<li>扫描时长:%s</li>\n' \
                                 u'\t\t</ul>\n' % (rname, self.target, starttime, interval)
                self._write_to_file(div_header_str)

                if len(malwebRes) != 0:
                    malwebContRes = True
                else:
                    malwebContRes = False

                if malwebContRes == True:
                    div_tc_str = u'\t\t<div class="tc">\n' \
                                 u'\t\t\t<h2>经检测:</h2>\n' \
                                 u'\t\t\t<p>发现该网站存在恶意网页(暗链|挂马|webshell),证明该网站已经遭到入侵。请尽快删除恶意网页或内容。</p>\n' \
                                 u'\t\t</div>\n'
                    self._write_to_file(div_tc_str)
                else:
                    div_tc_str = u'\t\t<div class="tc">\n' \
                                 u'\t\t\t<h2>经检测:</h2>\n' \
                                 u'\t\t\t<p>没有发现该网站存在恶意内容。</p>\n' \
                                 u'\t\t</div>\n'
                    self._write_to_file(div_tc_str)

                div_end_str = u'\t</div>\n'
                self._write_to_file(div_end_str)

                # 目标站点信息
                div_body_str = u'\t<div class="body">\n' \
                               u'\t\t<!-- 重复性模块:开始 -->\n' \
                               u'\t\t<div class="m">\n' \
                               u'\t\t\t<div class="m-h">\n' \
                               u'\t\t\t\t<h2>恶意网页检测结果</h2>\n' \
                               u'\t\t\t</div>\n' \
                               u'\t\t\t<div class="m-b">\n'
                self._write_to_file(div_body_str)

                if len(malwebRes):
                    hdurl_num = len(malwebRes)  # 当前检测到的暗链数量
                    div_c_str = u'\t\t\t\t<div class="c">\n' \
                                u'\t\t\t\t\t<dl class="c-t">\n' \
                                u'\t\t\t\t\t\t<dt><strong>暗链(%d)</strong></dt>\n' \
                                u'\t\t\t\t\t\t<dd> <strong>描述:</strong><br/>\n' \
                                u'\t\t\t\t\t\t\t暗链是指攻击者通过各种攻击手段向网站的正常网页中植入视觉上令人难以察觉的链接,这些链接往往是网游私服、医疗、博彩、色情,甚至是反动网站的网站链接。<br/>\n' \
                                u'\t\t\t\t\t\t\t<strong>修复建议:</strong><br/>\n' \
                                u'\t\t\t\t\t\t\t删除暗链代码,同时修复网站漏洞防止再次被植入暗链。\n' \
                                u'\t\t\t\t\t\t</dd>\n' \
                                u'\t\t\t\t\t</dl>\n' \
                                u'\t\t\t\t\t<div class="c-l">\n' % hdurl_num
                    self._write_to_file(div_c_str)

                    k = 0
                    # hd_url: 包含暗链的分支网站, hd_set: 该分支网站下的暗链信息{url:(content, level, type)}
                    for (hd_url, hd_set) in malwebRes.items():
                        k = k + 1
                        links_html = u''
                        for (include_url, include_property) in hd_set.items():
                            content = include_property[0]
                            level = include_property[1]
                            type = include_property[2]
                            links_html += u'\t\t\t\t\t\t\t\t\t<li>链接:%s\t内容:%s\t等级:%s\t类型:%s</li>\n' % (
                                include_url, content, level, type)

                        div_c_item_str = u'\t\t\t\t\t\t<div class="c-item">\n' \
                                         u'\t\t\t\t\t\t\t<div class="c-h">\n' \
                                         u'\t\t\t\t\t\t\t\t<h3>恶意网页%d/%d</h3>\n' \
                                         u'\t\t\t\t\t\t\t</div>\n' \
                                         u'\t\t\t\t\t\t\t<div class="c-b">\n' \
                                         u'\t\t\t\t\t\t\t\t<dl>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dt>页面URL:</dt>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dd><a href="%s">发现"%s"存在暗链</a></dd>\n' \
                                         u'\t\t\t\t\t\t\t\t</dl>\n' \
                                         u'\t\t\t\t\t\t\t\t<dl>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dt>严重等级:</dt>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dd><strong class="high">%s</strong>\n' \
                                         u'\t\t\t\t\t\t\t\t</dl>\n' \
                                         u'\t\t\t\t\t\t\t\t<dl>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dt>恶意内容:</dt>\n' \
                                         u'\t\t\t\t\t\t\t\t\t<dd></dd>\n' \
                                         u'\t\t\t\t\t\t\t\t</dl>\n' \
                                         u'\t\t\t\t\t\t\t\t<code class="quote">\n' \
                                         u'%s' \
                                         u'\t\t\t\t\t\t\t\t</code>\n' \
                                         u'\t\t\t\t\t\t\t</div>\n' \
                                         u'\t\t\t\t\t\t</div>\n' % (k, hdurl_num, hd_url, hd_url, settings.get('THREAT_LEVEL'), links_html)
                        self._write_to_file(div_c_item_str)
                    div_end_str = u'\t\t\t\t\t</div>\n' \
                                  u'\t\t\t\t</div>\n' \
                                  u'\t\t\t</div>\n'

                    self._write_to_file(div_end_str)

                else:
                    div_c_str = u'\t\t\t\t未发现任何恶意网页!\n'
                    self._write_to_file(div_c_str)
                    div_end_str = u'\t\t\t</div>\n'
                    self._write_to_file(div_end_str)

                div_end_str = u'\t\t</div>\n' \
                              u'\t</div>\n'
                self._write_to_file(div_end_str)

                html_end_str = u'</div>\n</body>\n</html>\n'
                self._write_to_file(html_end_str)

            except Exception, e:
                logger.error('Why this happen, report will return none:%s' % e)
                self._file.close()
                return None
Esempio n. 13
0
            raise DarkException(msg)

        try:
            main_style_file = open(self._style_main_filename, "r")
        except:
            raise DarkException, _('Cant open style file ' +
                                   self._html_filepath + '.')
        else:
            doctype_str = u'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' \
                          u'"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n' \
                          u'<html xmlns="http://www.w3.org/1999/xhtml">\n'
            self._write_to_file(doctype_str)
            head_str = u'<head>\n' \
                       u'\t<title>%s</title>\n' \
                       u'\t<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">\n' \
                       u'\t<STYLE TYPE="text/css">\n' % unicode(settings.get('HTML_TITLE'))
            self._write_to_file(head_str)
            self._write_to_file(main_style_file.read())
            headend_str = u'\n\t</style>\n</head>\n'
            self._write_to_file(headend_str)
            main_style_file.close()
            body_str = u'<body>\n'
            self._write_to_file(body_str)

    def _write_to_file(self, msg):
        '''
        Write msg to the file.

        @parameter msg: The message string.
        '''
        try:
Esempio n. 14
0
class hiddenlink_obj():
    def __init__(self, url):
        spider_path = pf.getProfileValue('spider', 'path')
        spider_setting_path = pf.getProfileValue('spider_setting', 'path')
        sys.path.append(spider_path)        # 将sinbot模块地址导入
        sys.path.append(spider_setting_path)# 将sinbot_settings模块的地址导入

        self.url = url                      # 用来保存当前检测的主页面的地址
        # self.rootPath = os.path.dirname(os.path.realpath(__file__)) # 用来保存当前检测的位置
        self.resultHiddenlink = {}          # 用来保存最终的检测结果
        self.urlList = []                   # 传递进来需要进行检测的URL列表
        self.curNum = 0                     # 统计当前检测的是第几条
        self.detectTM = ThreadManager()     # 线程管理



    def init(self):

        def get_url(list):
            '''
            描述: 将爬虫获取到的request列表中的url提取出来,并且格式化与去重复
            :param list:
            :return:
            '''
            tempList = []
            for item in list:
                url = item.url
                if url and url[-1] == '/':
                    url = url[:-1]
                tempList.append(url)
            return set(tempList)

        self.detectTM.setMaxThreads(10)     # 设置可以同时进行任务的个数


        from sinbot import sinbot_start     # 引入sinbot_start方法
        from settings.settings import settings as st # 引入sinbot_settings方法
        st.set('DEPTH_LIMIT', settings.getint('DEPTH_LIMIT'))    # 设置检测层数, 此处设置为2表示3层,从0开始计数
        reqList = sinbot_start(self.url)      # 开始爬取结果
        self.urlList = get_url(reqList)    # 将爬取到的url结果保存到列表中
        logger.info('Detect modules complete initialization...')

    def oneTask(self, url):
        self.curNum += 1        # 每执行一个任务,则将当前的任务数目+1
        logger.info('One detect task is running(%d/%d), detect url is : %s' % (self.curNum, len(self.urlList), url))
        starttime = time.time()
        hdDetect = Detect(url)
        hdDetect.init_detect()
        hdDetect.evil_detect()
        hdDetect.print_hiddenlink_result()
        if len(hdDetect.hiddenSet):
            self.resultHiddenlink[url] = hdDetect.hiddenSet
        endtime = time.time()
        logger.info('One detect task finished! Using %f seconds!' % (endtime-starttime))

    def run(self):
        # 0. 设置检测的开始时间
        startTime = time.time()
        temp = time.localtime(startTime)
        self.strStartTime= time.strftime('%Y-%m-%d %H:%M:%S',temp)

        for url in self.urlList:
            url = url.strip('\n')      # 格式化传入的url,存在\n会导致产生浏览器访问失败
            if url is not None:
                args = (url, )
                self.detectTM.startTask(self.oneTask, args)
            else:
                logger.error('No url need to detect, please check it!')

        self.detectTM.join()
        # 2. 设置检测结束的时间
        endTime = time.time()
        self.interval = human_time(endTime - startTime)         # 设置检测用时

        # 3. 生成检测报告
        logger.info('Detect running success! Now will make the detect report file!')
        html_report = HtmlFile(self)
        report_path = None
        try:
            report_path = html_report.genHtmlReport()
        except Exception, msg:
            logger.error('Make detect report file failed! Exception: %s.' % msg)

        logger.info('Store detect report success!')

        # 4. 将检测结果写入数据库
        threat_name = settings.get('THREAT_NAME')
        threat_sum = len(self.resultHiddenlink)
        threat_level = settings.get('THREAT_LEVEL')

        if report_path is None:
            logger.error('HTML maker get wrong report path! Please check it!')
            report_part_path = None
        else:
            path_list = report_path.split('/')
            report_part_path = path_list[-2] + '/' + path_list[-1]
        stat_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        if threat_sum != 0:
            id = PKgenerator.getPrimaryKeyId()
            try:
                ref_id = get_id_from_monitor_sites_by_url(self.url)
            except DarkException, msg:
                logger.error(msg)
            else:
                try:
                    store_url_hidden_report_in_monitor_statistic(id, ref_id, threat_name, threat_level, threat_sum ,stat_time, report_part_path)
                except DarkException, msg:
                    logger.error(msg)
Esempio n. 15
0
 def __init__(self):
     self.browser = Browser(settings.get('BROWSER_TYPE'))
     logger.info('Splinter browser is init!')