def data_achieve(self, pages): """ 获取事件名和链接 返回一个{链接:事件名}型的字典 :param pages: """ print '360_dataAchieve' events = {} data = [] for page in pages: try: soup = BeautifulSoup(page, "html5lib") except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo') else: titles = soup.find_all(href=re.compile("/vul/info/qid")) # title2 = soup.find_all(href=re.compile("/company/info/id/")) # title3 = soup.find_all(href=re.compile("/vul/search/")) # titles = title1 + title2 +title3 for title in titles: events['link'] = title['href'] events['title'] = title.string.strip() data.append(events.copy()) continue # database.remove_date(self.con) # database.insert_data(self.con, data) return data
def domain_description_achieve(self, url): """ 获取WooYun事件页面中的描述部分和厂商域名 :param url: wooyun漏洞页面 :return: (domain,description)domain可能为None, description可能为'' """ print 'WooYun_domain_description_achieve' page = self.request(url=url, header=self.headers).content try: soup = BeautifulSoup(page, "html5lib") except Exception as e: error_text = exception_format(e) print error_text self.send_text_email('Program Exception', error_text, 'ExceptionInfo') else: des = soup.find(class_="detail wybug_description").string # 获取描述 url2 = soup.find('h3', class_="wybug_corp").a['href'] if url2: page = self.request(url=url2, header=self.headers).content if page: raw_domain = etree.HTML(page) if u"厂商信息" in unicode(raw_domain.xpath('/html/head/title/text()')[0]): domain = self.get_domain(''.join(list(raw_domain.xpath('/html/body/div[5]/h3[1]/text()')[0])[3:])) return domain, des # 域名, 描述 else: # if '厂商' in sign and '不存在' in sign and '未通过审核' in sign: return None, des else: return None, des else: return None, des
def data_achieve(self, pages): """ 获取事件名和链接 返回一个{链接:事件名}型的字典 :param pages: """ print 'freebuf_dataAchieve' events = {} data = [] for page in pages: try: soup = BeautifulSoup(page, "html5lib") except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo') else: #titles = soup.find_all('a',href=re.compile("/bugs/vulbox"), target="_blank") titles = soup.find_all('h4',class_='tit') for title in titles: events['link'] = title.a['href'] events['title'] = title.a.string.strip() data.append(events.copy()) continue # database.remove_date(self.con) # database.insert_data(self.con, data) return data
def data_achieve(self, pages): """ 获取事件名和链接 返回一个{链接:事件名}型的字典 :param pages: """ print 'freebuf_dataAchieve' events = {} data = [] for page in pages: try: soup = BeautifulSoup(page, "html5lib") except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo') else: #titles = soup.find_all('a',href=re.compile("/bugs/vulbox"), target="_blank") titles = soup.find_all('h4', class_='tit') for title in titles: events['link'] = title.a['href'] events['title'] = title.a.string.strip() data.append(events.copy()) continue # database.remove_date(self.con) # database.insert_data(self.con, data) return data
def key_words_check(self, data): """ 检查获得的标题中是否含有要监看的关键字 内部调用sendRecord() 没有返回值 :param data: 事件标题组成的list """ print 'WooYun_key_words_check' md5value = self.file_md5_get if md5value != self.fileMd5: self.key_words_list = self.key_words_read self.fileMd5 = md5value try: for detail in data: title = detail.get('title').lower() print title for key1, values in self.key_words_list.iteritems(): if key1 in title: if values: dom, des = self.domain_description_achieve(detail.get('link')) for value in values: if value.get('KEY2'): if value.get('KEY2') in title: # 1. 在title中继续检查第二关键字是否存在 self.send_record(detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) elif des and (value.get('KEY2') in des): # 2. 在事件描述中查找是否存在第二关键字 self.send_record(detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) elif value.get('URL') and dom and (value.get('URL') in dom): # 二级关键词不中的话继续查域名 # 3. 进入页面检查厂商域名 self.send_record(detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) # elif value.get('KEY2') is None and value.get('URL') is not None: # # 会减少因域名导致的漏报,但也会增加误报 # self.send_record(detail.get('title').strip(), # detail.get('link'), # detail.get('id')) else: # 不存在二级关键词和域名的情况下,这就是中了嘛. self.send_record(detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) else: pass else: print "事件标题中不存在监看关键词『", key1, "』,开始检测下一关键词" except Exception as e: error_text = exception_format(e) print error_text self.send_text_email('Program Exception', error_text, 'ExceptionInfo')
def key_words_check(self, data): """ 检查获得的标题中是否含有要监看的关键字 函数内调用sendRecord() 没有返回值 :param data: """ print '360_key_words_check' md5value = self.file_md5_get if md5value != self.fileMd5: self.key_words_list = self.key_words_read self.fileMd5 = md5value try: for detail in data: title = detail.get('title').lower() print title for key1, values in self.key_words_list.iteritems(): if key1 in title: if values: dom, des = self.domain_description_achieve( self._360base_url + detail.get('link')) for value in values: if value.get('KEY2'): # 1. 检查第二关键字是否存在标题中 if value.get('KEY2') in title: # print '1.',_360title self.send_record( detail.get('title'), self._360base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) # 1. 检查第二关键字是否存在描述中 elif des and (value.get('KEY2') in des): self.send_record( detail.get('title'), self._360base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) elif value.get('URL') and dom and ( value.get('URL') in dom): # print '3.',_360title self.send_record( detail.get('title'), self._360base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) else: pass else: print "事件标题中不存在监看关键词『", key1, "』开始检测下一关键词" except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo')
def key_words_check(self, data): """ 检查获得的标题中是否含有要监看的关键字 函数内调用sendRecord() 没有返回值 :param data: """ print 'freebuf_keyWords_check' md5value = self.file_md5_get if md5value != self.fileMd5: self.key_words_list = self.key_words_read self.fileMd5 = md5value try: for detail in data: title = detail.get('title').lower() print title for key1, values in self.key_words_list.iteritems(): if key1 in title: if values: for value in values: if value.get('KEY2') is not None: if value.get('KEY2') in title: self.send_record( detail.get('title'), self.freebuf_base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) # else: #二级关键词不中的话继续查域名和内容 # 2. 进入页面检查厂商域名 # 3. 在页面内查找是否存在第二关键字 # 页面不是很规律,有点麻烦 elif value.get('KEY2') is None: self.send_record( detail.get('title'), self.freebuf_base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) else: pass else: print "事件标题中不存在监看关键词『", key1, "』开始检测下一关键词" except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo')
def data_achieve(self, text): """ 将json格式的数据取出 :param text: 来自data_request函数 """ print 'WooYun_dataAchieve' while 1: try: data = text.json() except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo') text = self.page_request() else: # database.remove_date(self.con) # database.insert_data(self.con, data) return data
def key_words_check(self, data): """ 检查获得的标题中是否含有要监看的关键字 函数内调用sendRecord() 没有返回值 :param data: """ print 'freebuf_keyWords_check' md5value = self.file_md5_get if md5value != self.fileMd5: self.key_words_list = self.key_words_read self.fileMd5 = md5value try: for detail in data: title = detail.get('title').lower() print title for key1, values in self.key_words_list.iteritems(): if key1 in title: if values: for value in values: if value.get('KEY2') is not None: if value.get('KEY2') in title: self.send_record(detail.get('title'), self.freebuf_base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) # else: #二级关键词不中的话继续查域名和内容 # 2. 进入页面检查厂商域名 # 3. 在页面内查找是否存在第二关键字 # 页面不是很规律,有点麻烦 elif value.get('KEY2') is None: self.send_record(detail.get('title'), self.freebuf_base_url + detail.get('link'), detail.get('link').split('/')[-1], value.get('TAG')) else: pass else: print "事件标题中不存在监看关键词『", key1, "』开始检测下一关键词" except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo')
def domain_description_achieve(self, url): """ 获取360补天事件页面中的描述部分和厂商域名 :param url: 360补天漏洞事件页面 :return: (domain,description)domain可能为None, description可能为'' """ print '360_domain_description_achieve' page = self.request(url=url, header=self.headers) if page: page = page.content try: soup = BeautifulSoup(page, "html5lib") des = soup.find(id="ld_td_description").string # 获取描述 dom_exi = soup.find(class_="ld-vul-v1-tips").string if u'已注册' in dom_exi: url2 = soup.find(href=re.compile(u'/vul/search/c/'))['href'] dom_page = self.request(self._360base_url + url2, self.headers) print "dom_page", dom_page if dom_page: print 'url:', dom_page.url raw_dom = BeautifulSoup(dom_page.content, "html5lib") tmp = raw_dom.find('div', class_='company_info') #360有反机器人机制 if tmp: domain = tmp.table.tbody.tr.td.next_sibling.next_sibling.string print 'domain:', domain return domain, des else: return None, des else: return None, des else: return None, des except Exception as e: error_text = exception_format(e) print error_text self.send_text_email('Program Exception', error_text, 'ExceptionInfo') return None, None
def domain_description_achieve(self, url): """ 获取WooYun事件页面中的描述部分和厂商域名 :param url: wooyun漏洞页面 :return: (domain,description)domain可能为None, description可能为'' """ print 'WooYun_domain_description_achieve' page = self.request(url=url, header=self.headers).content try: soup = BeautifulSoup(page, "html5lib") except Exception as e: error_text = exception_format(e) print error_text self.send_text_email('Program Exception', error_text, 'ExceptionInfo') else: des = soup.find(class_="detail wybug_description").string # 获取描述 url2 = soup.find('h3', class_="wybug_corp").a['href'] if url2: page = self.request(url=url2, header=self.headers).content if page: raw_domain = etree.HTML(page) if u"厂商信息" in unicode( raw_domain.xpath('/html/head/title/text()')[0]): domain = self.get_domain(''.join( list( raw_domain.xpath( '/html/body/div[5]/h3[1]/text()')[0])[3:])) return domain, des # 域名, 描述 else: # if '厂商' in sign and '不存在' in sign and '未通过审核' in sign: return None, des else: return None, des else: return None, des
def api_request(self, url=None, header=None): """ 从乌云API获取json格式的数据 返回json格式的数据 :param header: :param url: """ print 'WooYun_dataRequest' url = url or self.wooyun_url while True: try: if self.count > 10: self.send_text_email('Important Program Exception', 'Target url can not reach', 'ExceptionInfo') self.count = 0 return None page = requests.get(url, timeout=30, headers=header) except socket.timeout: time.sleep(30) self.count += 1 continue except requests.exceptions.ReadTimeout: time.sleep(10) self.count += 1 continue except requests.exceptions.ConnectTimeout: time.sleep(60) self.count += 1 continue except requests.exceptions.Timeout: time.sleep(30) self.count += 1 continue except requests.exceptions.ConnectionError: # 调试时连不上多半是这个问题 time.sleep(30) self.count += 1 continue except requests.exceptions.HTTPError as e: error_text = exception_format(e) self.send_text_email('Important Program Exception', error_text, 'ExceptionInfo') time.sleep(600) self.count += 1 continue except Exception as e: error_text = exception_format(e) self.send_text_email('Program Exception', error_text, 'ExceptionInfo') self.count += 1 continue else: if page.status_code == 200: self.count = 0 return page elif page.status_code == 522: continue elif page.status_code == 504: time.sleep(30) self.count += 1 continue elif page.status_code == 503: if page.headers.get('Retry-After'): time.sleep(int(page.headers.get('Retry-After'))) else: pass self.count += 1 continue else: error_text = "Page Code %s " % page.status_code self.send_text_email('Page Error', error_text, 'ExceptionInfo') self.count += 1 continue
def key_words_check(self, data): """ 检查获得的标题中是否含有要监看的关键字 内部调用sendRecord() 没有返回值 :param data: 事件标题组成的list """ print 'WooYun_key_words_check' md5value = self.file_md5_get if md5value != self.fileMd5: self.key_words_list = self.key_words_read self.fileMd5 = md5value try: for detail in data: title = detail.get('title').lower() print title for key1, values in self.key_words_list.iteritems(): if key1 in title: if values: dom, des = self.domain_description_achieve( detail.get('link')) for value in values: if value.get('KEY2'): if value.get('KEY2') in title: # 1. 在title中继续检查第二关键字是否存在 self.send_record( detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) elif des and (value.get('KEY2') in des): # 2. 在事件描述中查找是否存在第二关键字 self.send_record( detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) elif value.get('URL') and dom and ( value.get('URL') in dom): # 二级关键词不中的话继续查域名 # 3. 进入页面检查厂商域名 self.send_record( detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) # elif value.get('KEY2') is None and value.get('URL') is not None: # # 会减少因域名导致的漏报,但也会增加误报 # self.send_record(detail.get('title').strip(), # detail.get('link'), # detail.get('id')) else: # 不存在二级关键词和域名的情况下,这就是中了嘛. self.send_record( detail.get('title').strip(), detail.get('link'), detail.get('id'), value.get('TAG')) else: pass else: print "事件标题中不存在监看关键词『", key1, "』,开始检测下一关键词" except Exception as e: error_text = exception_format(e) print error_text self.send_text_email('Program Exception', error_text, 'ExceptionInfo')