def run(self): # 0. 设置检测的开始时间 startTime = time.time() temp = time.localtime(startTime) self.strStartTime= time.strftime('%Y-%m-%d %H:%M:%S',temp) for url in self.urlList: url = url.strip('\n') # 格式化传入的url,存在\n会导致产生浏览器访问失败 if url is not None: args = (url, ) self.detectTM.startTask(self.oneTask, args) else: logger.error('No url need to detect, please check it!') self.detectTM.join() # 2. 设置检测结束的时间 endTime = time.time() self.interval = human_time(endTime - startTime) # 设置检测用时 # 3. 生成检测报告 logger.info('Detect running success! Now will make the detect report file!') html_report = HtmlFile(self) report_path = None try: report_path = html_report.genHtmlReport() except Exception, msg: logger.error('Make detect report file failed! Exception: %s.' % msg)
def run(self): starttime = time.time() retry_flag = True retry_times = 0 while retry_flag and retry_times < self.retry_times: try: req = requests.get(self.url, headers=self.header, timeout=5) # print req.encoding if req.encoding == 'gb2312': self.html = req.text else: self.html = req.content if req.status_code not in self.retry_http_code: retry_flag = False else: retry_times += 1 except ConnectionError: retry_times += 1 except Timeout: retry_times += 1 except timeout: retry_times += 1 except Exception, e: logger.error('Request run exception an Exception, and type is %s' % type(e)) retry_flag = False
def __init__(self, url): self.url = url self.doc = None self.html = None try: self.root = url_object(url).getRootDomain except Exception, msg: logger.error('Html parser initialization filed, please check it! Exception: %s' % msg)
def get_style_in_class(self, className): ''' 描述: 获取html中的class对应的style ''' for script in self.styles: try: match = re.search(r'%s\x20[^\{]*\{([^\}]+)\}' % className, script, re.IGNORECASE | re.DOTALL) if match is not None: return match.group(1) except Exception, msg: logger.error(msg)
def get_id_in_javascript(self, functionName): ''' 描述: 获取html中的javascript方法中的ID ''' matchList = [] for js in self.javaScripts: try: blockRegex = r'function\s+%s\(\).+?document.getElementById\([\'\"](\w+)[\'\"]\).+?\}' % functionName block = re.search(blockRegex, js, re.IGNORECASE | re.DOTALL) if block is not None: searchRegex = r'document.getElementById\([\'\"](\w+)[\'\"]\)' match = re.findall(searchRegex, block.group(0), re.IGNORECASE | re.DOTALL) if match is not None: matchList = match except Exception, msg: logger.error(msg)
def genHtmlReport(self): """ This method is called when the scan has finished. """ # Just in case... if not self._initialized: self._init() try: malwebRes = self.obj.resultHiddenlink starttime = self.obj.strStartTime interval = self.obj.interval rname = u"恶意内容扫描报告" div_wrapper_str = u'<div id="wrapper">\n' u'\t<div class="tt">\n' u"\t\t<p>深信服安全中心云扫描服务</p></div>\n" self._write_to_file(div_wrapper_str) div_header_str = ( u'\t<div id="header">\n' u"\t\t<h1>WEB应用%s</h1>\n" u"\t\t<ul>\n" u"\t\t\t<li>目标网站:%s</li>\n" u"\t\t\t<li>开始时间:%s</li>\n" u"\t\t\t<li>扫描时长:%s</li>\n" u"\t\t</ul>\n" % (rname, self.target, starttime, interval) ) self._write_to_file(div_header_str) if len(malwebRes) != 0: malwebContRes = True else: malwebContRes = False if malwebContRes == True: div_tc_str = ( u'\t\t<div class="tc">\n' u"\t\t\t<h2>经检测:</h2>\n" u"\t\t\t<p>发现该网站存在恶意网页(暗链|挂马|webshell),证明该网站已经遭到入侵。请尽快删除恶意网页或内容。</p>\n" u"\t\t</div>\n" ) self._write_to_file(div_tc_str) else: div_tc_str = ( u'\t\t<div class="tc">\n' u"\t\t\t<h2>经检测:</h2>\n" u"\t\t\t<p>没有发现该网站存在恶意内容。</p>\n" u"\t\t</div>\n" ) self._write_to_file(div_tc_str) div_end_str = u"\t</div>\n" self._write_to_file(div_end_str) # 目标站点信息 div_body_str = ( u'\t<div class="body">\n' u"\t\t<!-- 重复性模块:开始 -->\n" u'\t\t<div class="m">\n' u'\t\t\t<div class="m-h">\n' u"\t\t\t\t<h2>恶意网页检测结果</h2>\n" u"\t\t\t</div>\n" u'\t\t\t<div class="m-b">\n' ) self._write_to_file(div_body_str) if len(malwebRes): hdurl_num = len(malwebRes) # 当前检测到的暗链数量 div_c_str = ( u'\t\t\t\t<div class="c">\n' u'\t\t\t\t\t<dl class="c-t">\n' u"\t\t\t\t\t\t<dt><strong>暗链(%d)</strong></dt>\n" u"\t\t\t\t\t\t<dd> <strong>描述:</strong><br/>\n" u"\t\t\t\t\t\t\t暗链是指攻击者通过各种攻击手段向网站的正常网页中植入视觉上令人难以察觉的链接,这些链接往往是网游私服、医疗、博彩、色情,甚至是反动网站的网站链接。<br/>\n" u"\t\t\t\t\t\t\t<strong>修复建议:</strong><br/>\n" u"\t\t\t\t\t\t\t删除暗链代码,同时修复网站漏洞防止再次被植入暗链。\n" u"\t\t\t\t\t\t</dd>\n" u"\t\t\t\t\t</dl>\n" u'\t\t\t\t\t<div class="c-l">\n' % hdurl_num ) self._write_to_file(div_c_str) k = 0 # hd_url: 包含暗链的分支网站, hd_set: 该分支网站下的暗链信息{url:(content, level, type)} for (hd_url, hd_set) in malwebRes.items(): k = k + 1 links_html = u"" for (include_url, include_property) in hd_set.items(): content = include_property[0] level = include_property[1] type = include_property[2] links_html += u"\t\t\t\t\t\t\t\t\t<li>链接:%s\t内容:%s\t等级:%s\t类型:%s</li>\n" % ( include_url, content, level, type, ) div_c_item_str = ( u'\t\t\t\t\t\t<div class="c-item">\n' u'\t\t\t\t\t\t\t<div class="c-h">\n' u"\t\t\t\t\t\t\t\t<h3>恶意网页%d/%d</h3>\n" u"\t\t\t\t\t\t\t</div>\n" u'\t\t\t\t\t\t\t<div class="c-b">\n' u"\t\t\t\t\t\t\t\t<dl>\n" u"\t\t\t\t\t\t\t\t\t<dt>页面URL:</dt>\n" u'\t\t\t\t\t\t\t\t\t<dd><a href="%s">发现"%s"存在暗链</a></dd>\n' u"\t\t\t\t\t\t\t\t</dl>\n" u"\t\t\t\t\t\t\t\t<dl>\n" u"\t\t\t\t\t\t\t\t\t<dt>严重等级:</dt>\n" u'\t\t\t\t\t\t\t\t\t<dd><strong class="high">%s</strong>\n' u"\t\t\t\t\t\t\t\t</dl>\n" u"\t\t\t\t\t\t\t\t<dl>\n" u"\t\t\t\t\t\t\t\t\t<dt>恶意内容:</dt>\n" u"\t\t\t\t\t\t\t\t\t<dd></dd>\n" u"\t\t\t\t\t\t\t\t</dl>\n" u'\t\t\t\t\t\t\t\t<code class="quote">\n' u"%s" u"\t\t\t\t\t\t\t\t</code>\n" u"\t\t\t\t\t\t\t</div>\n" u"\t\t\t\t\t\t</div>\n" % (k, hdurl_num, hd_url, hd_url, settings.get("THREAT_LEVEL"), links_html) ) self._write_to_file(div_c_item_str) div_end_str = u"\t\t\t\t\t</div>\n" u"\t\t\t\t</div>\n" u"\t\t\t</div>\n" self._write_to_file(div_end_str) else: div_c_str = u"\t\t\t\t未发现任何恶意网页!\n" self._write_to_file(div_c_str) div_end_str = u"\t\t\t</div>\n" self._write_to_file(div_end_str) div_end_str = u"\t\t</div>\n" u"\t</div>\n" self._write_to_file(div_end_str) html_end_str = u"</div>\n</body>\n</html>\n" self._write_to_file(html_end_str) except Exception, e: logger.error("Why this happen, report will return none:%s" % e) self._file.close() return None
def genHtmlReport(self): ''' This method is called when the scan has finished. ''' # Just in case... if not self._initialized: self._init() try: malwebRes = self.obj.resultHiddenlink starttime = self.obj.strStartTime interval = self.obj.interval rname = u'恶意内容扫描报告' div_wrapper_str = u'<div id="wrapper">\n' \ u'\t<div class="tt">\n' \ u'\t\t<p>深信服安全中心云扫描服务</p></div>\n' self._write_to_file(div_wrapper_str) div_header_str = u'\t<div id="header">\n' \ u'\t\t<h1>WEB应用%s</h1>\n' \ u'\t\t<ul>\n' \ u'\t\t\t<li>目标网站:%s</li>\n' \ u'\t\t\t<li>开始时间:%s</li>\n' \ u'\t\t\t<li>扫描时长:%s</li>\n' \ u'\t\t</ul>\n' % (rname, self.target, starttime, interval) self._write_to_file(div_header_str) if len(malwebRes) != 0: malwebContRes = True else: malwebContRes = False if malwebContRes == True: div_tc_str = u'\t\t<div class="tc">\n' \ u'\t\t\t<h2>经检测:</h2>\n' \ u'\t\t\t<p>发现该网站存在恶意网页(暗链|挂马|webshell),证明该网站已经遭到入侵。请尽快删除恶意网页或内容。</p>\n' \ u'\t\t</div>\n' self._write_to_file(div_tc_str) else: div_tc_str = u'\t\t<div class="tc">\n' \ u'\t\t\t<h2>经检测:</h2>\n' \ u'\t\t\t<p>没有发现该网站存在恶意内容。</p>\n' \ u'\t\t</div>\n' self._write_to_file(div_tc_str) div_end_str = u'\t</div>\n' self._write_to_file(div_end_str) # 目标站点信息 div_body_str = u'\t<div class="body">\n' \ u'\t\t<!-- 重复性模块:开始 -->\n' \ u'\t\t<div class="m">\n' \ u'\t\t\t<div class="m-h">\n' \ u'\t\t\t\t<h2>恶意网页检测结果</h2>\n' \ u'\t\t\t</div>\n' \ u'\t\t\t<div class="m-b">\n' self._write_to_file(div_body_str) if len(malwebRes): hdurl_num = len(malwebRes) # 当前检测到的暗链数量 div_c_str = u'\t\t\t\t<div class="c">\n' \ u'\t\t\t\t\t<dl class="c-t">\n' \ u'\t\t\t\t\t\t<dt><strong>暗链(%d)</strong></dt>\n' \ u'\t\t\t\t\t\t<dd> <strong>描述:</strong><br/>\n' \ u'\t\t\t\t\t\t\t暗链是指攻击者通过各种攻击手段向网站的正常网页中植入视觉上令人难以察觉的链接,这些链接往往是网游私服、医疗、博彩、色情,甚至是反动网站的网站链接。<br/>\n' \ u'\t\t\t\t\t\t\t<strong>修复建议:</strong><br/>\n' \ u'\t\t\t\t\t\t\t删除暗链代码,同时修复网站漏洞防止再次被植入暗链。\n' \ u'\t\t\t\t\t\t</dd>\n' \ u'\t\t\t\t\t</dl>\n' \ u'\t\t\t\t\t<div class="c-l">\n' % hdurl_num self._write_to_file(div_c_str) k = 0 # hd_url: 包含暗链的分支网站, hd_set: 该分支网站下的暗链信息{url:(content, level, type)} for (hd_url, hd_set) in malwebRes.items(): k = k + 1 links_html = u'' for (include_url, include_property) in hd_set.items(): content = include_property[0] level = include_property[1] type = include_property[2] links_html += u'\t\t\t\t\t\t\t\t\t<li>链接:%s\t内容:%s\t等级:%s\t类型:%s</li>\n' % ( include_url, content, level, type) div_c_item_str = u'\t\t\t\t\t\t<div class="c-item">\n' \ u'\t\t\t\t\t\t\t<div class="c-h">\n' \ u'\t\t\t\t\t\t\t\t<h3>恶意网页%d/%d</h3>\n' \ u'\t\t\t\t\t\t\t</div>\n' \ u'\t\t\t\t\t\t\t<div class="c-b">\n' \ u'\t\t\t\t\t\t\t\t<dl>\n' \ u'\t\t\t\t\t\t\t\t\t<dt>页面URL:</dt>\n' \ u'\t\t\t\t\t\t\t\t\t<dd><a href="%s">发现"%s"存在暗链</a></dd>\n' \ u'\t\t\t\t\t\t\t\t</dl>\n' \ u'\t\t\t\t\t\t\t\t<dl>\n' \ u'\t\t\t\t\t\t\t\t\t<dt>严重等级:</dt>\n' \ u'\t\t\t\t\t\t\t\t\t<dd><strong class="high">%s</strong>\n' \ u'\t\t\t\t\t\t\t\t</dl>\n' \ u'\t\t\t\t\t\t\t\t<dl>\n' \ u'\t\t\t\t\t\t\t\t\t<dt>恶意内容:</dt>\n' \ u'\t\t\t\t\t\t\t\t\t<dd></dd>\n' \ u'\t\t\t\t\t\t\t\t</dl>\n' \ u'\t\t\t\t\t\t\t\t<code class="quote">\n' \ u'%s' \ u'\t\t\t\t\t\t\t\t</code>\n' \ u'\t\t\t\t\t\t\t</div>\n' \ u'\t\t\t\t\t\t</div>\n' % (k, hdurl_num, hd_url, hd_url, settings.get('THREAT_LEVEL'), links_html) self._write_to_file(div_c_item_str) div_end_str = u'\t\t\t\t\t</div>\n' \ u'\t\t\t\t</div>\n' \ u'\t\t\t</div>\n' self._write_to_file(div_end_str) else: div_c_str = u'\t\t\t\t未发现任何恶意网页!\n' self._write_to_file(div_c_str) div_end_str = u'\t\t\t</div>\n' self._write_to_file(div_end_str) div_end_str = u'\t\t</div>\n' \ u'\t</div>\n' self._write_to_file(div_end_str) html_end_str = u'</div>\n</body>\n</html>\n' self._write_to_file(html_end_str) except Exception, e: logger.error('Why this happen, report will return none:%s' % e) self._file.close() return None
class hiddenlink_obj(): def __init__(self, url): spider_path = pf.getProfileValue('spider', 'path') spider_setting_path = pf.getProfileValue('spider_setting', 'path') sys.path.append(spider_path) # 将sinbot模块地址导入 sys.path.append(spider_setting_path)# 将sinbot_settings模块的地址导入 self.url = url # 用来保存当前检测的主页面的地址 # self.rootPath = os.path.dirname(os.path.realpath(__file__)) # 用来保存当前检测的位置 self.resultHiddenlink = {} # 用来保存最终的检测结果 self.urlList = [] # 传递进来需要进行检测的URL列表 self.curNum = 0 # 统计当前检测的是第几条 self.detectTM = ThreadManager() # 线程管理 def init(self): def get_url(list): ''' 描述: 将爬虫获取到的request列表中的url提取出来,并且格式化与去重复 :param list: :return: ''' tempList = [] for item in list: url = item.url if url and url[-1] == '/': url = url[:-1] tempList.append(url) return set(tempList) self.detectTM.setMaxThreads(10) # 设置可以同时进行任务的个数 from sinbot import sinbot_start # 引入sinbot_start方法 from settings.settings import settings as st # 引入sinbot_settings方法 st.set('DEPTH_LIMIT', settings.getint('DEPTH_LIMIT')) # 设置检测层数, 此处设置为2表示3层,从0开始计数 reqList = sinbot_start(self.url) # 开始爬取结果 self.urlList = get_url(reqList) # 将爬取到的url结果保存到列表中 logger.info('Detect modules complete initialization...') def oneTask(self, url): self.curNum += 1 # 每执行一个任务,则将当前的任务数目+1 logger.info('One detect task is running(%d/%d), detect url is : %s' % (self.curNum, len(self.urlList), url)) starttime = time.time() hdDetect = Detect(url) hdDetect.init_detect() hdDetect.evil_detect() hdDetect.print_hiddenlink_result() if len(hdDetect.hiddenSet): self.resultHiddenlink[url] = hdDetect.hiddenSet endtime = time.time() logger.info('One detect task finished! Using %f seconds!' % (endtime-starttime)) def run(self): # 0. 设置检测的开始时间 startTime = time.time() temp = time.localtime(startTime) self.strStartTime= time.strftime('%Y-%m-%d %H:%M:%S',temp) for url in self.urlList: url = url.strip('\n') # 格式化传入的url,存在\n会导致产生浏览器访问失败 if url is not None: args = (url, ) self.detectTM.startTask(self.oneTask, args) else: logger.error('No url need to detect, please check it!') self.detectTM.join() # 2. 设置检测结束的时间 endTime = time.time() self.interval = human_time(endTime - startTime) # 设置检测用时 # 3. 生成检测报告 logger.info('Detect running success! Now will make the detect report file!') html_report = HtmlFile(self) report_path = None try: report_path = html_report.genHtmlReport() except Exception, msg: logger.error('Make detect report file failed! Exception: %s.' % msg) logger.info('Store detect report success!') # 4. 将检测结果写入数据库 threat_name = settings.get('THREAT_NAME') threat_sum = len(self.resultHiddenlink) threat_level = settings.get('THREAT_LEVEL') if report_path is None: logger.error('HTML maker get wrong report path! Please check it!') report_part_path = None else: path_list = report_path.split('/') report_part_path = path_list[-2] + '/' + path_list[-1] stat_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') if threat_sum != 0: id = PKgenerator.getPrimaryKeyId() try: ref_id = get_id_from_monitor_sites_by_url(self.url) except DarkException, msg: logger.error(msg) else: try: store_url_hidden_report_in_monitor_statistic(id, ref_id, threat_name, threat_level, threat_sum ,stat_time, report_part_path) except DarkException, msg: logger.error(msg)