def parse(self, response): #创建表 Sql.ctl_tb_product_details() Sql.ctl_tb_cve_detail_list() #获取所以的page页href soup = BeautifulSoup(response.text, 'lxml') page_list = soup.find_all( 'a', title="Next Page-List of software or hardware products") count = 0 for item in page_list: #count = count + 1 #if count == 2: # return page_url = self.base_url + item['href'] print(page_url) yield Request(page_url, self.get_product_id)
def parse(self, response): #print(response.text) #获取url Sql.ctl_tb_cve_detail_list() soup = BeautifulSoup(response.text, 'lxml') for vul_type in self.type_list: print('##########' + vul_type) result_list = soup.find_all( 'a', href=re.compile( "/vulnerability-list/vendor_id-26/product_id-"), title=re.compile(self.type_dict[vul_type])) for item in result_list: url = self.base_url + item['href'] #print(url) if 'year-' in url: print(url) yield Request(url, callback=self.get_cve_details_page)
def get_product_id(self, response): #获取最大的product id soup = BeautifulSoup(response.text, 'lxml') href_url = soup.find_all( 'a', href=re.compile("//www.cvedetails.com/product/"), title=re.compile("Product Details")) print('#####################################') for url in href_url: print(url['href']) page_detail_list = url['href'].split('/') product_id = page_detail_list[4] product_name = Sql.sqliteEscape( page_detail_list[5].split('?')[0].replace('.html', '')) vendor_id = page_detail_list[5].split('?')[1].replace( 'vendor_id=', '') #print('product_id=%s, product_name=%s, vendor_id=%s' % (product_id, product_name, vendor_id)) Sql.insert_product_details(product_id, product_name, vendor_id) product_url = self.base_url + '/product/' + product_id print('#####product_url:' + product_url) yield Request(product_url, self.get_product_url)
def read_excel_to_db(self): wb = openpyxl.load_workbook(self.excelSettingsFileName) # 获取workbook中所有的表格 sheets = wb.sheetnames print(sheets) # 循环遍历所有sheet for i in range(len(sheets)): if i == 0: continue sheet = wb[sheets[i]] print('\n\n第' + str(i) + '个sheet: ' + sheet.title + '->>>') sheet_title = sheet.title for r in range(1, sheet.max_row + 1): category = '' product_name = '' product_id = '' vendor_id = '' product_search = '' vendor_search = '' owner = '' if r == 1: print('\n' + ''.join([ str(sheet.cell(row=r, column=c).value).ljust(17) for c in range(1, sheet.max_column + 1) ])) else: category = str(sheet.cell(row=r, column=1).value) product_name = str(sheet.cell(row=r, column=2).value) product_id = str(sheet.cell(row=r, column=3).value) vendor_id = str(sheet.cell(row=r, column=4).value) product_search = str(sheet.cell(row=r, column=5).value) vendor_search = str(sheet.cell(row=r, column=6).value) owner = str(sheet.cell(row=r, column=7).value) #print('category=%s, product_name=%s, product_id=%s, vendor_id=%s, product_search=%s, vendor_search=%s, owner=%s' % (category, product_name, product_id, vendor_id, product_search, vendor_search, owner)) Sql.insert_tb_settings(category, product_name, product_id, vendor_id, product_search, vendor_search, owner)
def __init__(self, plugin_type): print('##Begin write plugins data for %s' % (plugin_type)) if plugin_type == 'topvas': self.plugins_path = settings.Topvas_PATH self.plugins_table = 'nvts' Sql.ctl_tb_plugins(self.plugins_table) Sql.cls_tb_plugins(self.plugins_table) elif plugin_type == 'nessus': self.plugins_path = settings.Nessus_PATH self.plugins_table = 'nvts_ness' Sql.ctl_tb_plugins(self.plugins_table) Sql.cls_tb_plugins(self.plugins_table) else: sys.exit('plugin_type[%s] error!!!' % (plugin_type)) self.count = 0 return
def save_Excel(self): wbk = xlwt.Workbook() sheet = wbk.add_sheet(self.excelSheetName, cell_overwrite_ok=True) headList = [ 'product_id', 'product_name', 'year', 'vul_type', 'cve', 'openvas_file', 'openvas_exist', 'topvas_ness_file', 'nessus_file', 'nessus_exist', 'ts_file', 'ts_count' ] rowIndex = 0 self.WriteSheetRow(sheet, headList, rowIndex, True) reports_list = Sql.select_from_cve_report() for report_info in reports_list: rowIndex = rowIndex + 1 valueList = [] for i in range(0, len(report_info)): valueList.append(report_info[i]) self.WriteSheetRow(sheet, valueList, rowIndex, False) fileName = os.path.join(os.getcwd(), self.excelFileName) wbk.save(fileName)
def get_cve_details_url(self, response): item = CvedetailsItem() soup = BeautifulSoup(response.text, 'lxml') #1.name TEXT NOT NULL result_list = soup.find_all( 'a', href=re.compile("//www.cvedetails.com/product")) name = result_list[0].text item['name'] = name print('1.操作系统:' + item['name']) #2.year TEXT NOT NULL result_list = soup.find_all('a', title=re.compile('Go to page ')) vul_type_list = result_list[0]['href'].split('&') print(vul_type_list) year = '' for value in vul_type_list: if 'year=' in value: year = value.split('=')[1] break item['year'] = year print('2.年份:' + item['year']) #3.vul_type TEXT vul_type = '' for value in vul_type_list: if '=1' in value and 'op' in value: vul_type = value.split('=')[0] break if vul_type == '': print('#ERROR# vul_type is null') vul_type = 'of exploits' vul_type = self.type_dict_cn[vul_type] item['vul_type'] = vul_type print('3.漏洞类别:' + item['vul_type']) #4.cve TEXT NOT NULL cve_list = soup.find_all('a', href=re.compile("/cve/CVE")) #print(cve_list) print('4.cve:') cve_all = '' count = 0 for cve in cve_list: count = count + 1 if count == 1: cve_all = cve_all + cve.text else: cve_all = cve_all + ',' + cve.text item['cve'] = cve_all print('###' + item['cve']) for cve in cve_list: Sql.insert_cve_detail_list(name, year, vul_type, cve.text) return item
def __init__(self): Sql.ctl_tb_cve_details()
def get_cve_details_url(self, response): item = CvedetailsItem() soup = BeautifulSoup(response.text, 'lxml') #1.product_id TEXT NOT NULL result_list = soup.find_all('a', title=re.compile('Go to page ')) vul_type_list = result_list[0]['href'].split('&') print(vul_type_list) product_id = '' for value in vul_type_list: if 'product_id=' in value: product_id = value.split('=')[1] break item['product_id'] = product_id print('1.产品ID:' + item['product_id']) #2.product_name TEXT NOT NULL product_list = soup.find_all( 'a', href=re.compile("//www.cvedetails.com/product")) if len(product_list) == 0: product_list = soup.find_all( 'a', href=re.compile("//www.cvedetails.com/vendor")) product_name = Sql.sqliteEscape(product_list[0].text) item['product_name'] = product_name print('2.操作系统:' + item['product_name']) #3.year TEXT NOT NULL #print(vul_type_list) year = '' for value in vul_type_list: if 'year=' in value: year = value.split('=')[1] break item['year'] = year print('3.年份:' + item['year']) #3.vul_type TEXT vul_type = '' for value in vul_type_list: if '=1' in value and 'op' in value: vul_type = value.split('=')[0] break if vul_type == '': print('#ERROR# vul_type is null') vul_type = 'of exploits' #vul_type = settings.TYPE_DICT_CN[vul_type] vul_type = settings.TYPE_DICT_EN[vul_type] item['vul_type'] = vul_type print('4.漏洞类别:' + item['vul_type']) #4.cve TEXT NOT NULL cve_list = soup.find_all('a', href=re.compile("/cve/CVE")) #print(cve_list) print('5.cve:') cve_all = '' count = 0 for cve in cve_list: count = count + 1 if count == 1: cve_all = cve_all + cve.text else: cve_all = cve_all + ',' + cve.text item['cve'] = cve_all print('###' + item['cve']) for cve in cve_list: Sql.insert_cve_detail_list(product_id, product_name, year, vul_type, cve.text) return item
def __init__(self): Sql.ctl_tb_cve_details() Sql.ctl_tb_cve_detail_list() Sql.clr_cve_details() Sql.clr_cve_detail_list()
#读取文件"cvedetails/plugins_data/excel/重点应用分类.xlsx"内容并保存在表settings print('##STEP1:insert into table settings') excel_fd = ExcelRead() excel_fd.read_excel_to_db() #2.根据settings表中数据通过scrapy爬取cvedetails.com数据 print('##STEP2:start spider') os.system('python main.py') #3.获取topvas和nessus的插件数据到表nvts和nvts_ness print('##STEP3:get plugins data') # topvas = PluginsWriteData('topvas') # topvas.file_walk() # ness = PluginsWriteData('nessus') # ness.file_walk() Sql.delete_XX_nasl() #4.生成报告到表cve_report print('##STEP4:insert into cve_report') topvas = TopVAS() topvas.cve_report() #5.将表cve_report写到excel文件中 #文件名:"cvedetails/plugins_data/excel/插件移植报告.xls" print('##STEP5:read cve_report and write to FILE cvedetails/plugins_data/excel/插件移植报告.xls') excel_w_fd = excelProcess() excel_w_fd.save_Excel() print('##ALL Finish!!!!')
def get_cve_details_url(self, response): item = CvedetailsItem() soup = BeautifulSoup(response.text, 'lxml') #1.product_id TEXT NOT NULL result_list = soup.find_all('a', title=re.compile('Go to page ')) vul_type_list = result_list[0]['href'].split('&') print(vul_type_list) product_id = '' for value in vul_type_list: if 'product_id=' in value: product_id = value.split('=')[1] break item['product_id'] = product_id print('1.产品ID:' + item['product_id']) #2.product_name TEXT NOT NULL #<a href="//www.cvedetails.com/product/2002/Microsoft-.net-Framework.html?vendor_id=26" title="Product Details Microsoft .net Framework">.net Framework</a> product_list = soup.find( 'a', href=re.compile("//www.cvedetails.com/product"), title=re.compile("Product Details")) product_txt = '' if product_list != None: product_txt = product_list.text #<a href="//www.cvedetails.com/vendor/26/Microsoft.html" title="Details for Microsoft">Microsoft</a> vendor_list = soup.find('a', href=re.compile("//www.cvedetails.com/vendor"), title=re.compile("Details for")) vendor_txt = '' if vendor_list != None: vendor_txt = vendor_list.text if product_txt == '' and vendor_txt == '': print('#ERROR product null and vendor null') return # print('############product:') # print(product_txt) # print(type(product_txt)) # print('############vendor:') # print(vendor_txt) # print(type(product_txt)) product_name_text = vendor_txt + '->' + product_txt if product_name_text[0] == '->': product_name_text = product_name_text[1:] product_name = Sql.sqliteEscape(product_name_text) item['product_name'] = product_name print('2.名字' + item['product_name']) #3.year TEXT NOT NULL #print(vul_type_list) year = '' for value in vul_type_list: if 'year=' in value: year = value.split('=')[1] break item['year'] = year print('3.年份:' + item['year']) #3.vul_type TEXT vul_type = '' for value in vul_type_list: if '=1' in value and 'op' in value: vul_type = value.split('=')[0] break if vul_type == '': print('#ERROR# vul_type is null') vul_type = 'of exploits' #vul_type = settings.TYPE_DICT_CN[vul_type] vul_type = settings.TYPE_DICT_EN[vul_type] item['vul_type'] = vul_type print('4.漏洞类别:' + item['vul_type']) #4.cve TEXT NOT NULL cve_list = soup.find_all('a', href=re.compile("/cve/CVE")) #print(cve_list) print('5.cve:') cve_all = '' count = 0 for cve in cve_list: count = count + 1 if count == 1: cve_all = cve_all + cve.text else: cve_all = cve_all + ',' + cve.text item['cve'] = cve_all print('###' + item['cve']) for cve in cve_list: Sql.insert_cve_detail_list(product_id, product_name, year, vul_type, cve.text) return item
def cve_report(self): #cve topvas cve_detail_list = Sql.select_cve_detail_list() progress = 0 for cve_info in cve_detail_list: product_id = cve_info[0] product_name = cve_info[1] year = cve_info[2] vul_type = cve_info[3] cve = cve_info[4] #topvas nvt_topvas_list = Sql.select_nvts_topvas_by_cve(cve) openvas_exist = 'no' openvas_file = '' topvas_ness_file = '' if len(nvt_topvas_list) != 0: openvas_exist = 'yes' openvas_file = '' count = 0 for file in nvt_topvas_list: count = count + 1 if count == 1: openvas_file = file[0] else: openvas_file = openvas_file + ',' + file[0] #nessus nvt_ness_list = Sql.select_nvts_ness_by_cve(cve) nessus_file = '' nessus_exist = 'no' topvas_ness_file_list = [] if len(nvt_ness_list) != 0: nessus_exist = 'yes' nessus_file = '' count = 0 for file in nvt_ness_list: count = count + 1 ns_file = 'ns_' + file[0] ret = Sql.select_nvts_by_file(ns_file) if ret[0] == 1: print('file:%s存在' + file[0]) topvas_ness_file_list.append(file[0]) topvas_ness_file = topvas_ness_file + ',' + ns_file nessus_file = nessus_file + ',' + file[0] if ',' in nessus_file: nessus_file = nessus_file[1:] nessus_list = nessus_file.split(',') ts_count = 0 ts_file = '' if openvas_exist == 'no' and nessus_exist == 'yes': if len(topvas_ness_file_list) == 0: ts_count = len(nessus_list) ts_file = nessus_file else: for ness_info in nessus_list: if ness_info not in topvas_ness_file_list: ts_count = ts_count + 1 ts_file = ts_file + ',' + ness_info if ',' in ts_file: ts_file = ts_file[1:] #去除首位逗号, if ',' in topvas_ness_file: topvas_ness_file = topvas_ness_file[1:] #生成报告 progress = progress + 1 #if progress == 101: # return print('###progress:%d' % (progress)) Sql.insert_cve_report(product_id, product_name, year, vul_type, cve, openvas_file, openvas_exist, topvas_ness_file, nessus_file, nessus_exist, ts_file, ts_count) #删除表nvts_nons Sql.drop_tb_nvts_nons()
def __init__(self): #Sql.drop_tb_cve_report() Sql.ctl_tb_cve_report() Sql.cls_tb_cve_report() Sql.ctl_index_nvts_ness() Sql.ctl_tb_nvts_nons() Sql.ctl_index_nvts_nons() Sql.insert_nvts_nons()
def Del_XX_nasl(self): #删除XX_*.nasl文件 Sql.delete_XX_nasl()
def __init__(self): self.excelSettingsFileName = settings.ExcelSettingsFileName Sql.ctl_tb_settings() Sql.ctr_tb_settings()