class VIA4RefUpdater: def __init__(self, server=None, port=None, driver=None, force_update=False, cve_updated=True): self.path = join(os.path.dirname(join(os.path.abspath(__file__))), 'data') self.last_year = datetime.datetime.now().year self.url = 'https://www.cve-search.org/feeds/via4.json' self.driver = driver self.force_update = force_update self.cve_updated = cve_updated if self.driver is None: self.driver = MongoDriver(server=server, port=port) if not self.driver.is_connected(): self.driver.connect() Path(self.path).mkdir(parents=True, exist_ok=True) def update(self): print('Starting VIA4 references updater...') json_file = join(self.path, self.url.rsplit('/', 1)[-1]) json_content = requests.get(self.url).content json_hash = hashlib.sha256(json_content).hexdigest() with open(json_file, 'wb') as f: f.write(json_content) ignore = False try: ignore = json_hash == self.driver.get_info_via4()['hash'] except: print( "Can't find hash of previous update. Updating nonetheless...") if ignore and not self.force_update: return False self._update_db(json_file, json_hash) self._cleanup_files() return True def _update_db(self, json_file, json_hash): with open(json_file) as f: data = json.load(f) keys = data['cves'].keys() with tqdm(total=len(keys)) as pbar: for el in keys: if el.startswith('VE'): el = el.replace('VE', 'CVE', 1) details = list(self.driver.get_cve({'_id': el})) if len(details) == 0: pbar.update(1) continue details = details[0] details['via4'] = data['cves'][el] self.driver.write_details_cve(details) pbar.update(1) self.driver.write_info_via4(json_hash) def _cleanup_files(self): os.remove(join(self.path, self.url.rsplit('/', 1)[-1]))
class CrossReferenceUpdater: def __init__(self, server=None, port=None, driver=None): self.path = join(os.path.dirname(join(os.path.abspath(__file__))), 'data') self.driver = driver if self.driver is None: self.driver = MongoDriver(server=server, port=port) if not self.driver.is_connected(): self.driver.connect() def update_capec(self, force_update=False, capec_updated=True, cve_updated=True): print('Starting crossreference updater for CAPEC entries...') cursor_cve = self.driver.get_cve({}) cursor_capec = self.driver.get_capec({}) count_cve = self.driver.get_collection('cve_details').count_documents({}) count_capec = self.driver.get_collection('capec_details').count_documents({}) with tqdm(desc='Stage 1', total=count_cve) as pbar: for item in cursor_cve: cwe = [] problems = item['cve']['problemtype']['problemtype_data'] if 'capec' in item.keys() and not capec_updated and not force_update: pbar.update(1) continue for problem in problems: details = problem['description'] for el in details: if el['value'].startswith('CWE'): cwe.append(el['value']) cwe = list(set(cwe)) capec_ids = [] for weakness in cwe: capec_by_weakness = self.driver.get_capec({'weaknesses': weakness}) capec_ids.extend([item['id'] for item in capec_by_weakness]) capec_ids = list(set(capec_ids)) to_add = [] for entry in capec_ids: capec = list(self.driver.get_capec({'_id': entry}))[0] to_add.append({'id': entry, 'name': capec['name']}) item['capec'] = to_add self.driver.write_details_cve(item) pbar.update(1) with tqdm(desc='Stage 2', total=count_capec) as pbar: for item in cursor_capec: if 'cve' in item.keys() and not cve_updated and not force_update: pbar.update(1) continue cve_by_capec = self.driver.get_cve({'capec.id': item['id']}) ids = list(set([el['_id'] for el in cve_by_capec])) item['cve'] = ids self.driver.write_entry_capec(item) pbar.update(1) def update_via4(self, force_update=True, cve_updated=True): VIA4RefUpdater(driver=self.driver, force_update=force_update, cve_updated=cve_updated).update()
class CVESearch: def __init__(self, server=None, port=None): self.driver = MongoDriver(server, port) def update(self, force_update=False): from cve_search.capec_updater import CAPECUpdater from cve_search.crossref_updater import CrossReferenceUpdater from cve_search.cve_updater import CVEUpdater capec_updated = CAPECUpdater(driver=self.driver, force_update=force_update).update() cve_updated = CVEUpdater(driver=self.driver, force_update=force_update).update() cross_updater = CrossReferenceUpdater(driver=self.driver) cross_updater.update_capec(force_update, capec_updated, cve_updated) cross_updater.update_via4(force_update, cve_updated) def query_cve(self, *argv): self._connect() return self.driver.get_cve(*argv) def find_cve_by_id(self, cve_id): return list(self.query_cve({"_id": cve_id}))[0] def find_cve_by_capec(self, capec_id, extended_query=False): cve_list = list(self.query_capec({"_id": capec_id}))[0]['cve'] if not extended_query: return cve_list return self.query_cve({"$or": [{'_id': item} for item in cve_list]}) def get_all_cve(self): return self.query_cve({}, {}) def query_capec(self, *argv): self._connect() return self.driver.get_capec(*argv) def find_capec_by_id(self, capec_id): return list(self.query_capec({"_id": capec_id}))[0] def get_all_capec(self): return self.query_capec({}, {}) def _connect(self): if not self.driver.is_connected(): self.driver.connect() def close(self): self.driver.close_connection()
class CVEUpdater: def __init__(self, server=None, port=None, driver=None, force_update=False, scrape_history = True, max_attemps_scraper = 5, delay_scraper = 0): self.path = join(os.path.dirname(join(os.path.abspath(__file__))), 'data') self.last_year = datetime.datetime.now().year self.url = 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{0}.{1}' self.starting_year = 2002 self.force_update = force_update self.driver = driver self.scraper = None if self.driver is None: self.driver = MongoDriver(server=server, port=port) if not self.driver.is_connected(): self.driver.connect() if scrape_history: self.scraper = CVEChangelogScraper(max_attempts=max_attemps_scraper, delay_attempt=delay_scraper) Path(self.path).mkdir(parents=True, exist_ok=True) def update(self): print("Starting CVE updater...") year = self.starting_year modified = False while year <= self.last_year: meta_url = self.url.format(year, 'meta') meta_file = join(self.path, meta_url.rsplit('/', 1)[-1]) meta_content = requests.get(meta_url).content meta_hash = hashlib.sha256(meta_content).hexdigest() try: ignore = meta_hash == self.driver.get_info_cve(year)['hash'] except: print("Can't find hash of previous update for year {}. Updating nonetheless...".format(year)) ignore = False if ignore and not self.force_update: year += 1 print("CVE Entries for year {} already updated. Skipping...".format(year)) continue json_file_url = self.url.format(year, 'json.gz') json_gz_file = join(self.path, json_file_url.rsplit('/', 1)[-1]) json_file = join(self.path, self.url.format(year, 'json').rsplit('/', 1)[-1]) with open(json_gz_file, 'wb') as f: r = requests.get(json_file_url) f.write(r.content) with gzip.open(json_gz_file, 'rb') as f_in: with open(json_file, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) success = True if not success: print("Download failed for year {}, skipping...".format(year)) year += 1 continue modified = True self._update_db(json_file, year, meta_hash) with open(meta_file, 'wb') as f: f.write(meta_content) year += 1 self._cleanup_files() return modified def _cleanup_files(self): to_delete = [f for f in os.listdir(self.path) if isfile(join(self.path, f)) and f.startswith('nvdcve') and ( f.endswith('.json.gz') or f.endswith('.json') or f.endswith('.meta'))] for file in to_delete: os.remove(join(self.path, file)) def _update_db(self, json_file, year, meta_hash): with open(json_file) as f: data = json.load(f) cve_entries = data['CVE_Items'] info = dict(data) del info['CVE_Items'] self.driver.write_info_cve(info, year, meta_hash) with tqdm(desc="Year " + str(year), total=len(cve_entries)) as pbar: for entry in cve_entries: cve_id = entry['cve']['CVE_data_meta']['ID'] description = entry['cve']['description']['description_data'][0]['value'] if self.scraper is not None and not description.startswith('** REJECT **'): history = self.scraper.get_history(cve_id) if history is None: entry['history'] = {} else: entry['history'] = history else: entry['history'] = {} self.driver.write_details_cve(entry) pbar.update(1) print('CVE Entries for year {} updated successfully'.format(year))
class CAPECUpdater: def __init__(self, server=None, port=None, driver=None, force_update=False): self.path = join(os.path.dirname(join(os.path.abspath(__file__))), 'data') self.last_year = datetime.datetime.now().year self.url = 'https://capec.mitre.org/data/xml/capec_v3.2.xml' self.driver = driver self.force_update = force_update if self.driver is None: self.driver = MongoDriver(server=server, port=port) if not self.driver.is_connected(): self.driver.connect() Path(self.path).mkdir(parents=True, exist_ok=True) def update(self): print("Starting CAPEC updater...") xml_file = join(self.path, self.url.rsplit('/', 1)[-1]) xml_content = requests.get(self.url).content xml_hash = hashlib.sha256(xml_content).hexdigest() try: if xml_hash == self.driver.get_info_capec( )['hash'] and not self.force_update: print("CAPEC already updated. Aborting...") return False except: print( "Can't find hash of previous CAPEC update. Updating nonetheless..." ) with open(xml_file, 'wb') as f: f.write(xml_content) self._update_db(xml_file, xml_hash) self._cleanup_files() return True def _update_db(self, xml_file, xml_hash): with open(xml_file) as f: data = xmltodict.parse(f.read()) data = data['Attack_Pattern_Catalog']['Attack_Patterns'][ 'Attack_Pattern'] i = 0 while i < len(data): prerequisites = [] mitigations = [] consequences = [] related_cwe = [] likelihood = None typical_severity = None id_capec = "CAPEC-" + data[i]['@ID'] name = data[i]['@Name'] description = data[i]['Description'] if 'Prerequisites' in data[i].keys(): prerequisites = data[i]['Prerequisites']['Prerequisite'] if not isinstance(prerequisites, list): prerequisites = [prerequisites] prerequisites = [item for item in prerequisites] if 'Mitigations' in data[i].keys(): mitigations = data[i]['Mitigations']['Mitigation'] if isinstance(mitigations, dict): mitigations = mitigations['xhtml:p'] if 'Likelihood_Of_Attack' in data[i].keys(): likelihood = data[i]['Likelihood_Of_Attack'] if 'Typical_Severity' in data[i].keys(): typical_severity = data[i]['Typical_Severity'] if 'Consequences' in data[i].keys(): consequences = data[i]['Consequences']['Consequence'] if type(consequences) == list: consequences = [dict(item) for item in consequences] else: consequences = dict(consequences) if 'Related_Weaknesses' in data[i].keys(): related_weaknesses = data[i]['Related_Weaknesses'][ 'Related_Weakness'] if isinstance(related_weaknesses, dict): related_weaknesses = [related_weaknesses] related_cwe = [ 'CWE-' + item['@CWE_ID'] for item in related_weaknesses ] info = { 'id': id_capec, 'name': name, 'description': description, 'likelihood': likelihood, 'typical_severity': typical_severity, 'prerequisites': prerequisites, 'mitigations': mitigations, 'consequences': consequences, 'weaknesses': related_cwe } self.driver.write_entry_capec(info) i += 1 self.driver.write_info_capec(xml_hash) def _cleanup_files(self): os.remove(join(self.path, self.url.rsplit('/', 1)[-1]))