def parse(self, response): impact_rating = response.xpath( '//*[@id="content"]/div/p/strong/text()').re('^[LIMC]') advisory = response.xpath('//*[@id="content"]/div/p/strong/text()').re( ' .*') # not multiline add it later re.MULTILINE cve_id = response.xpath( '//*[@id="content"]/div/p[1]/a[contains(text(),"CVE")]/text()' ).extract() versions_Affected = response.xpath( '//p[contains(text(),"Affects")]/text()[1]').re(' .*') arg = args.argparsed modified_Impact_name = [] for item in impact_rating: if item == 'L': item = 'low' if item == 'I': item = 'important' if item == 'M': item = 'moderate' if item == 'C': item = 'critical' modified_Impact_name.append(item) impact_rating = modified_Impact_name # to check if already exist name = ConnectersSpider.name url = ConnectersSpider.start_urls changed = ce.do_need_notify(name, "Output/" + name + '.csv', cve_id, versions_Affected, advisory, url) try: data = { 'impact_rating': impact_rating, 'Advisory': advisory, "CVE-Id": cve_id, "versions_Affected": versions_Affected } df = pd.DataFrame(data, columns=[ 'impact_rating', "CVE-Id", 'Advisory', "versions_Affected" ]) if changed is True: out.outputAs(name, df) # if aditional data output format required additional_output_as = arg.output_as if additional_output_as != False: out.custom_output(additional_output_as, name, df) except Exception as e: print("type error: " + name + " " + str(e))
def parse(self, response): impact_rating = response.xpath('//dt/h3/text()').re('^[lmin]') advisory = response.xpath('//name/text()').extract() cve_id = response.xpath('//h3/a/text()').extract() versions_Affected = response.xpath( '//tr/td[contains(text(), "2.")]/text()').extract() # Code for all cves arg = args.argparsed modified_Impact_name = [] for item in impact_rating: if item == 'l': item = 'low' if item == 'i': item = 'important' if item == 'm': item = 'moderate' if item == 'n': item = 'n/a' modified_Impact_name.append(item) impact_rating = modified_Impact_name # to check if already exist name = HttpdSpider.name url = HttpdSpider.start_urls changed = ce.do_need_notify(name, "Output/" + name + '.csv', cve_id, versions_Affected, advisory, url) try: data = { 'impact_rating': impact_rating, 'Advisory': advisory, "CVE-Id": cve_id, "versions_Affected": versions_Affected } df = pd.DataFrame(data, columns=[ 'impact_rating', "CVE-Id", 'Advisory', "versions_Affected" ]) if changed is True: out.outputAs(name, df) # if aditional data output format required additional_output_as = arg.output_as if additional_output_as != False: out.custom_output(additional_output_as, name, df) except Exception as e: print("type error: " + name + " " + str(e))
def parse(self, response): impact_rating = response.xpath('//dl/dt/a[3]').re('.*?\#(.*)?"') cve_id = response.xpath( '//dt/a[contains(text(), "CVE-")]/text()').extract() advisory = response.xpath( '//*[@id="content"]/div/article/div/dl/dd/text()').extract() versions_affected = response.xpath('//dd/ul/li[contains(text(),"1.0.2")]').re( '.*?ted(.*?)?\)') fixed_in_OpenSSL = response.xpath( '//dd/ul/li[contains(text(),"1.0.2")]').re('.*?L(.*?)?[\<|\(]') fixed_link = response.xpath( '//dl/dd/ul/li/a[contains(text(),"git")]/@href').extract() # to check if already exist arg = args.argparsed name = OpensslSpider.name url = OpensslSpider.start_urls changed = ce.do_need_notify(name, "Output/" + name + '.csv', cve_id, versions_affected, advisory, url) try: data = { 'Impact Rating': impact_rating, 'Advisory': advisory, "CVE-Id": cve_id, "Versions Affected": versions_affected, 'Fixed in OpenSSL': fixed_in_OpenSSL } add = {'fixed_link': fixed_link} df = pd.DataFrame( data, columns=[ "Impact Rating", "CVE-Id", "Advisory", "Versions Affected", "Fixed in OpenSSL", ]) additional = pd.DataFrame(add, columns=['fixed_link']) if changed is True: out.outputAs(name, df) # if aditional data output format required additional_output_as = arg.output_as if additional_output_as != False: out.custom_output(additional_output_as, name, df) except Exception as e: print("type error: " + name + " " + str(e))
def parse(self, response): date = response.xpath('//tr/td[3]/text()').extract() advisory = response.xpath('//tr/td[2]/a/text()').extract() cve_id = response.xpath('//tr/td[6]//text()').extract() versions_affected = response.xpath('//tr/td[4]/a/text()').extract() versions_affected2 = response.xpath('//tr/td[5]/a/text()').extract() cwe = response.xpath('//tr/td[7]/a/text()').extract() # to check if already exist arg = args.argparsed name = CurlSpider.name url = CurlSpider.start_urls changed = ce.do_need_notify(name, "Output/" + name + '.csv', cve_id, versions_affected, advisory, url) try: data = { "Date": date, "Advisory": advisory, "CVE-Id": cve_id, "versions_affected": versions_affected, "Affected_upto": versions_affected2, "Cwe": cwe } df = pd.DataFrame( data, columns=[ "CVE-Id", "Advisory", "Date", "versions_affected", "Affected_upto" ]) if changed is True: out.outputAs(name, df) # if aditional data output format required additional_output_as = arg.output_as if additional_output_as != False: out.custom_output(additional_output_as, name, df) except Exception as e: print("type error: " + name + " " + str(e))
def parse(self, response): advisory = response.xpath( '//*[@id="content"]/div/p/a/preceding-sibling::strong/text()' ).extract() impact_rating = [] cve_id = response.xpath( '//div/p/strong/following-sibling::a/text()').extract() header = response.xpath('//div/h3/@id').extract() versions_affected = [] arg = args.argparsed for x in range(2, len(header)): # //*[@id="Fixed_in_Apache_Tomcat_9.0.5"]/following-sibling::div[1]/p[contains(text(),"Affects")] # cveid_numof = //*[@id="Fixed_in_Apache_Tomcat_9.0.5"]/following-sibling::div[1]/p/strong/following-sibling::a/text() cve_path = '(//*[@id="' a_path = '(//*[@id="' cve_path += header[x] a_path += header[x] a_path += '"]/following-sibling::div[1]/p[contains(text(),"Affect")]/text())' cve_path += '"]/following-sibling::div[1]/p/strong/following-sibling::a/text())' a_path = response.xpath(a_path).extract() cve_path = response.xpath(cve_path).extract() if len(a_path) == len(cve_path): for j in enumerate(a_path): versions_affected.append(j[1]) else: if len(cve_path) == 1: versions_affected.append("null") else: # //*[@id="content"]/div[44]/p[./a="CVE-2012-5568"]/preceding-sibling::p[1]/text() for k in range(len(cve_path) - 1): a_path = '(//*[@id="content"]/div/p[./a="' a_path += cve_path[k] a_path += '"]/preceding-sibling::p[1][contains(text(),"Affects")]/text())' a_path = response.xpath(a_path).extract() if (len(a_path) != 0): versions_affected.append(a_path[0]) else: versions_affected.append("null") # (//*[@id="content"]/div/p[./a="CVE-2014-0160"]/following-sibling::p[contains(text(),"Affects")]/text()) a_path = '(//*[@id="content"]/div/p[./a="' a_path += cve_path[k] a_path += '"]/following-sibling::p[contains(text(),"Affects")]/text())' a_path = response.xpath(a_path).extract() if (len(a_path) != 0): versions_affected.append(a_path[0]) else: versions_affected.append("null") modified_Impact_name = [] for item in advisory: temp = '' if item.startswith('Low'): temp = 'Low' elif item.startswith('Moderate'): temp = 'Moderate' elif item.startswith('Important'): temp = 'Important' elif item.startswith('Critical'): temp = 'Critical' else: temp = 'n/a' modified_Impact_name.append(temp) impact_rating = modified_Impact_name # to check if already exist name = Tomcat8Spider.name url = Tomcat8Spider.start_urls changed = ce.do_need_notify(name, "Output/" + name + '.csv', cve_id, versions_affected, advisory, url) try: data = { 'impact_rating': impact_rating, 'Advisory': advisory, "CVE-Id": cve_id, "versions_Affected": versions_affected } df = pd.DataFrame(data, columns=[ 'impact_rating', 'CVE-Id', 'Advisory', 'versions_Affected' ]) if changed is True: out.outputAs(name, df) # if aditional data output format required additional_output_as = arg.output_as if additional_output_as != False: out.custom_output(additional_output_as, name, df) except Exception as e: print("type error: " + name + " " + str(e))