def parse_edb(raw_content, report_link): tree = html.fromstring(raw_content) keyword_section = tree.xpath('/html/body/div/div[2]/div[2]/div/div/div[1]/div/div[1]/h1/text()') if len(keyword_section) > 0: title = keyword_section[0].replace('\n', ' ') cve_line = tree.xpath('/html/body/div/div[2]/div[2]/div/div/div[1]/div/div[2]/div[1]/div[1]/div/div[1]/div/div/div/div[2]/h6/a/text()') link = report_link.replace('/exploits/', '/raw/') page = requests.get(link, timeout=60, headers={'User-Agent': "Magic Browser"}) crawl_result = crawl_content_from_link(link) if crawl_result is None: return {} str_content, _ = crawl_result str_content = str(page.content).replace('\r\n', ' ') if len(cve_line) > 0: cve_line = encode_content(cve_line[0]) for i in cve_line: if not (i.isdigit() or i == '-'): cve_line = '' break return {'cve_id': ['CVE-' + cve_line], 'title': encode_content(title), 'content': encode_content(str_content)} else: return {'cve_id': [], 'title': encode_content(title), 'content': encode_content(str_content)} print('ERROR in parse_edb') return {}
def parse_securitytracker(raw_content, clean_content): content = str(raw_content) # print(clean_content) keyword_section = content[content.find('<title>') + 7:content.find('</title>') - 18].replace( '\n', ' ') if len(keyword_section) > 0: cve_id_list = [] start_loc = clean_content.find('CVE Reference:') end_loc = clean_content.find('(Links to External Site)') if start_loc < end_loc and start_loc != -1 and end_loc != -1: str_content = clean_content[start_loc:end_loc] matched_cve = regex_cve(str_content) if len(matched_cve) > 0: cve_id_list.append(matched_cve[0]) start_loc = clean_content.find('Version(s):') end_loc = clean_content.find('Description:') if start_loc < end_loc and start_loc != -1 and end_loc != -1: str_content = clean_content[start_loc + len('Version(s):'):end_loc] return { 'cve_id': cve_id_list, 'title': encode_content(keyword_section), 'content': encode_content(str_content) } print('ERROR in parse_securitytracker') return {}
def parse_securityfocus_forum(raw_content, clean_content): tree = html.fromstring(raw_content) keyword_section = tree.xpath('//*[@id="comments"]/div/a/text()') if len(keyword_section) > 0: title = keyword_section[0].replace('\n', ' ') str_content = '' for i in tree.xpath('//*[@id="comments"]/div/div/text()'): str_content += i.replace('\n', ' ') return { 'cve_id': get_cve_id_list(title, clean_content), 'title': encode_content(title), 'content': encode_content(str_content) } print('ERROR in parse_securityfocus_forum') return {}
def parse_openwall(raw_content, clean_content): tree = html.fromstring(raw_content) msg = tree.xpath('/html/body/pre/text()') if len(msg) >= 1: msg = msg[0] loc_start = msg.find('Subject:') if loc_start != -1: loc_end = msg.find('\n\n', loc_start) title = msg[loc_start:loc_end].replace('\n', ' ') str_content = '' for i in tree.xpath('/html/body/pre/text()'): str_content += i.replace('>\n', ' ').replace('\n', ' ') return { 'cve_id': get_cve_id_list(title, clean_content), 'title': encode_content(title), 'content': encode_content(str_content) } print('ERROR in parse_openwall') return {}
def parse_securityfocus_official(raw_content): tree = html.fromstring(raw_content) keyword_section = tree.xpath('//*[@id="vulnerability"]/span//text()') str_content = '' if len(keyword_section) > 0: keyword_section = keyword_section[0] for i in tree.xpath( '//*[@id="vulnerability"]/table/tr[9]/td[2]/text()'): str_content += i cve_id = tree.xpath( '//*[@id="vulnerability"]/table/tr[3]/td[2]/text()') cve_id_list = [] for line in cve_id: matched_cve = regex_cve(line) if len(matched_cve) > 0: cve_id_list.append(matched_cve[0]) return { 'cve_id': cve_id_list, 'title': encode_content(keyword_section), 'content': encode_content(str_content) } print('ERROR in parse_securityfocus_official') return {}