def main(): # keep a record of key data items so we can log what we've done auditor = Auditor(JOB_NAME, r'../../config/va_auditor.yaml') auditor.commencement_time = datetime.datetime.today() # make sure the temp file isn't there from a previous run if os.path.exists(TEMP_FILE): os.remove(TEMP_FILE) # get details of qids from KB files qids = {} for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, KB_SOURCE_BLOB): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): try: qids.update(extract_kb(line)) except Exception as ex: print(f"Exception in processing KB entries: {ex}") print(line) sys.exit() # main loop for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, SOURCE_BLOB): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): auditor.records_read = auditor.records_read + 1 try: records = extract_details(line, qids) except Exception as ex: print(f"Exception in extracting QVM record: {ex}") print(line) sys.exit() for record in records: try: dt.write_json_line(record, TEMP_FILE) auditor.records_written = auditor.records_written + 1 except Exception as ex: print(f"Exception in writing json line: {ex}") print(record) sys.exit() dt.save_file_to_bucket(TEMP_FILE, TARGET_PROJECT, TARGET_BUCKET, TARGET_BLOB) # clean up the temp file if os.path.exists(TEMP_FILE): os.remove(TEMP_FILE) auditor.completion_time = datetime.datetime.today() auditor.log_event()
def main(run_date): # keep a record of key data items so we can log what we've done with Auditor(data_set=JOB_NAME) as auditor: graph = load_cmdb_graph(JOB_NAME, SOURCE_PROJECT, SOURCE_BUCKET, CMDB_GRAPH_BLOB) # set up a temp file for saving to # set the auditor to automatically track the written records temp_file = dt.temp_file(JOB_NAME, auditor) # the main processing loop for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, VM_FINDINGS_BLOB + '.*' + datetime.date.strftime(run_date, '%Y-%m-%d')): print(blob.name) for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): auditor.records_read = auditor.records_read + 1 vm_finding = json.loads(line) by_ip = find_record_in_graph(graph, vm_finding.get('IP'), vm_finding.get('NETBIOS')) merged = {**vm_finding, **by_ip} temp_file.write_json_line(merged) blob_name = TARGET_BLOB.replace('%date', '%Y-%m-%d') blob_name = run_date.strftime(blob_name) temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, blob_name)
def main(run_date): # keep a record of key data items so we can log what we've done with Auditor(JOB_NAME, r'../../config/va_auditor.yaml') as auditor: # set up a temp file to save the records to temp_file = dt.temp_file(JOB_NAME, auditor) records = {} # the main loop for blob in dt.get_list_of_blobs( SOURCE_PROJECT, SOURCE_BUCKET, SOURCE_BLOB + '.*' + datetime.date.strftime(run_date, '%Y-%m-%d')): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): details = extract_details(line) if details[0] in records: records[details[0]].append(details[1]) else: records[details[0]] = [details[1]] for record in records: json_line = {"QID": record, "CVES": records[record]} temp_file.write_json_line(json_line) blob_name = TARGET_BLOB.replace('%date', '%Y-%m-%d') blob_name = run_date.strftime(blob_name) temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, blob_name)
def main(run_date): # keep a record of key data items so we can log what we've done auditor = Auditor(JOB_NAME, r'../../config/va_auditor.yaml') auditor.commencement_time = datetime.datetime.today() # set up a temp file for saving to # set the auditor to automatically track the written records temp_file = dt.temp_file(JOB_NAME, auditor) #Create QVM all report. # Takes in CVE summary (CVEId, CVSS data, QID, MFL/Exploit data). Key by CVEId # Takes in QID-CVE map. Can search by CVE or QID, many->many relationship # Takes in Asset findings (QVM == Qualys machine scan results, along with CMDB data. Key by QID, IP Address # Takes in Qualys descriptions and such like). Key by IP address # Once all data available, create triage rating based upon OLD triage algo and add. Then output as CSV(?) # Generator across Asset findings (each will have an IP, some CMDB data and a QID). Then get CVE from QIDCVEMap to get # CVE summary data. Get triage based upon compounded data from Triage subroutine, and add any QID description needed. # Then output as csv (possibly? Still to do...) # Get CVE summary data CVESummaries = {} for blob in dt.get_list_of_blobs( SOURCE_PROJECT, SOURCE_BUCKET, CVE_SUMMARY_SOURCE_BLOB_PATH + '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): data_record = json.loads(line) CVESummaries[data_record['CVE']] = data_record # Likewise QID summaries (will have the QID verbose description on it) QIDSummaries = {} for blob in dt.get_list_of_blobs( SOURCE_PROJECT, SOURCE_BUCKET, QID_SUMMARY_SOURCE_BLOB_PATH + '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): data_record = json.loads(line) QIDSummaries[data_record['QID']] = data_record # And finally likewise the QID -> CVE map data. This is many <-> many, so collect it as sets of CVE Ids # which are keyed by the QID in question, as it will be searched by QID. CVEsForAllQIDs = {} for blob in dt.get_list_of_blobs( SOURCE_PROJECT, SOURCE_BUCKET, QID_CVE_SOURCE_BLOB_PATH + '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): data_record = json.loads(line) if data_record['QID'] in CVEsForAllQIDs: # Add to existing set CVEsForAllQIDs[data_record['QID']].add(data_record['CVE']) else: # New item on dict creating a new set CVEsForAllQIDs[data_record['QID']] = {data_record['CVE']} # Now, parse the whole finding set retrieving the enrichment data from the existing indices for blob in dt.get_list_of_blobs( SOURCE_PROJECT, SOURCE_BUCKET, ASSET_FINDINGS_SOURCE_BLOB_PATH + '.*' + datetime.datetime.strftime(run_date, '%Y-%m-%d')): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): finding = json.loads(line) # Do some column renames where appropriate to match VSM reporting names finding['VulnID'] = finding.pop('QID') finding['ScanScore'] = finding.pop('SEVERITY') if 'ENVIRONMENT' in finding and not finding[ 'ENVIRONMENT'] is None and finding['ENVIRONMENT'].upper( )[:4] == 'PROD': serverIsProduction = True else: serverIsProduction = False if 'CBP' in finding: CBP = getMaxCBP(finding['CBP']) # Homogenise the values if 'NONE' in CBP.upper(): CBP = '' else: CBP = '' # Presumes no CBP if no data returned. May need to revisit # Return the CBP value to the findings dict so that its duplicates are eliminated finding['CBP'] = CBP # Add various keys that are missing in some cases with empty values to the # finding so that the output data is consistent in the fields it presents if not 'PORT' in finding or finding['PORT'] is None: finding['PORT'] = '' if not 'SOX' in finding or finding['SOX'] is None: finding['SOX'] = 'false' if not 'STEWARD' in finding or finding['STEWARD'] is None: finding['STEWARD'] = '' if not 'CMDB_OS' in finding or finding['CMDB_OS'] is None: finding['CMDB_OS'] = '' if not 'CMDB_OS_VERSION' in finding or finding[ 'CMDB_OS_VERSION'] is None: finding['CMDB_OS_VERSION'] = '' # Retrieve the QID summary for the finding if finding['VulnID'] in QIDSummaries: qidSummary = QIDSummaries[finding['VulnID']] else: # Got a QID with no summary, so build a dummy one. Should really not happen. qidSummary = { 'QID': finding['VulnID'], 'Patchable': 'Unknown', 'Published_Date': 'Unknown', 'baseScore': 0, 'availabilityImpact': 'NONE', 'confidentialityImpact': 'NONE', 'integrityImpact': 'NONE', 'VulnerabilityName': '', 'Category': '', 'Solution': '', 'VendorNotes': '' } # Get all the CVEs associated with the finding (may be more than one) if finding['VulnID'] in CVEsForAllQIDs: # Code to generate triage based upon matching CVE data CVEIdsForQID = CVEsForAllQIDs[finding['VulnID']] # Get all the summaries. The odd selector is Dictionary Comprehension syntax and can be read as # 'Create a new dictionary (keys:values) based on the keys and values from CVESummaries if the key for # an entry in CVESummaries is in CVEsForQID' CVESummariesForQID = { k: v for (k, v) in CVESummaries.items() if k in CVEIdsForQID } # Get a single line rollup of all the CVE data for the QID that can then be used for both triage and return data. cveSummaryForQID = CVESummaryForQID(CVESummariesForQID) # The triage will rely on the highest/worst values for any of the CVEs returned, so pass the generator for those into # a routine to derive that. TriageString = Triage(cveSummaryForQID['MFL'], cveSummaryForQID['BaseScore'], cveSummaryForQID['Exploit_Known'], cveSummaryForQID['UserInteraction'], serverIsProduction, CBP, cveSummaryForQID['Confidentiality'], cveSummaryForQID['Integrity'], cveSummaryForQID['Availability']) # Finally, bundle the whole lot together as a dict out output data. data_out = dict(finding, **cveSummaryForQID) # concatenates these dicts else: # QID has no matching CVE/CVSS data. Generate triage based off Qualys data. TODO Find correct Algo for this # Prepare a dict to look like the CVSS one. Score and vectors are taken from the QID summary # UI is presumed to be false, as this data is not available for QID findings (and QID findings tend # to be stuff like unpatched software which require no UI anyway) fakeCVESummary = { 'CVE': '', 'Confidentiality': qidSummary['confidentialityImpact'].upper(), 'Integrity': qidSummary['integrityImpact'].upper(), 'Availability': qidSummary['availabilityImpact'].upper(), 'UserInteraction': False, 'BaseScore': float(qidSummary['baseScore']), 'MFL': False, 'Exploit_Known': False, 'MFLCVEs': '', 'MFLCount': 0 } # Prepare a Triage string based upon the QID data as loaded into the fake CVE summary above TriageString = Triage(fakeCVESummary['MFL'], fakeCVESummary['BaseScore'], fakeCVESummary['Exploit_Known'], fakeCVESummary['UserInteraction'], serverIsProduction, CBP, fakeCVESummary['Confidentiality'], fakeCVESummary['Integrity'], fakeCVESummary['Availability']) # And create the reportLine much as before data_out = dict(finding, **fakeCVESummary) # concatenates these dicts # Add QIDSummary data to the output data_out['Patchable'] = qidSummary[ 'Patchable'] # Add the required fields from the QID summary data_out['Published_Date'] = qidSummary['Published_Date'] data_out['VulnerabilityName'] = qidSummary.get( 'VulnerabilityName') or '' data_out['Category'] = qidSummary.get('Category') or '' data_out['Solution'] = qidSummary.get('Solution') or '' data_out['VendorReferences'] = qidSummary.get( 'VendorReferences') or '' # Add the triage string data_out[ 'TriagedRating'] = TriageString # Adds the triaged value to the return dict # Derive the ScanType from the supplied ASSET_TYPE if it is present if not 'ASSET_TYPE' in finding or finding['ASSET_TYPE'] is None: data_out[ 'ScanType'] = '' # Don't set this if there is no ASSET_TYPE. May change. elif finding['ASSET_TYPE'] == 'server': data_out['ScanType'] = 'I' # Internal elif finding['ASSET_TYPE'] == 'workstation': data_out['ScanType'] = 'E' # Endpoint else: data_out[ 'ScanType'] = '' # Should never be hit, but assures that a value of some sort is returned # Add the derived date-based data data_out['ReportDate'] = datetime.datetime.now().strftime( '%Y-%m-%dT%H:%M:%SZ') data_out['Cycle'] = datetime.datetime.now().strftime('%m %Y') firstFoundDate = datetime.datetime.strptime( finding['FIRST_FOUND_DATETIME'], '%Y-%m-%dT%H:%M:%SZ') delta = datetime.datetime.now() - firstFoundDate data_out['DaysSinceFirstFound'] = delta.days if 'High' in TriageString: targetRemediationDate = firstFoundDate + timedelta(weeks=4) elif 'Medium' in TriageString: targetRemediationDate = firstFoundDate + timedelta( days=183 ) # 6 months is a variable time. Pick a good approximation else: # Low targetRemediationDate = firstFoundDate + timedelta( days=365 ) # as is one year (think leap years). Again, approximate data_out['RemediationDue'] = targetRemediationDate.strftime( '%Y-%m-%dT%H:%M:%SZ') data_out[ 'TargetBreached'] = targetRemediationDate < datetime.datetime.now( ) # Other fields data_out['Concat'] = finding['ID'] + '-' + finding['VulnID'] # Write out line to temp file (calls json.dumps to write string out) temp_file.write_json_line(data_out) # finally write out the temp file to the bucket after incorporating the run_date preFormat = TARGET_BLOB.replace('%date', '%Y-%m-%d') destinationFile = run_date.strftime(preFormat) temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, destinationFile) # No need to explicitly remove the local file. temp_file class has a destructor that will do that. temp_file = None auditor.completion_time = datetime.datetime.today() auditor.log_event()
def main(): # keep a record of key data items so we can log what we've done with Auditor(JOB_NAME, r'../../config/va_auditor.yaml') as auditor: # set up a temp file for saving to # set the auditor to automatically track the written records temp_file = dt.temp_file(JOB_NAME, auditor) # create a list of the CVEs in these two sets mfl_blob = dt.select_file_records(SOURCE_PROJECT, SOURCE_BUCKET, MFL_LIST_BLOB) mfl_index = set(jl.create_index(mfl_blob, 'CVE')) edb_blob = dt.select_file_records(SOURCE_PROJECT, SOURCE_BUCKET, CVES_WITH_EXPLOITS_BLOB) edb_index = set(jl.create_index(edb_blob, 'CVE')) # the main loop for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, NVD_CVE_SUMMARY_BLOB): for nvd_cve_summary_line in dt.read_blob_lines( SOURCE_PROJECT, SOURCE_BUCKET, blob.name): record = json.loads(nvd_cve_summary_line) result = {} result['CVE'] = record.get('CVE') if record['v2.0'] != {}: result['Confidentiality'] = record['v2.0'].get( 'confidentialityImpact') result['Integrity'] = record['v2.0'].get('integrityImpact') result['Availability'] = record['v2.0'].get( 'availabilityImpact') result['UserInteraction'] = record['v2.0'].get( 'userInteractionRequired') result['BaseScore'] = record['v2.0'].get('baseScore') elif record['v3.0'] != {}: result['Confidentiality'] = record['v3.0'].get( 'confidentialityImpact') result['Integrity'] = record['v3.0'].get('integrityImpact') result['Availability'] = record['v3.0'].get( 'availabilityImpact') result['UserInteraction'] = record['v3.0'].get( 'userInteraction') result['BaseScore'] = record['v3.0'].get('baseScore') else: result['Confidentiality'] = '' result['Integrity'] = '' result['Availability'] = '' result['UserInteraction'] = '' result['BaseScore'] = '' # could have also implemented by adding an MFL=True # column to the MFL set and joined on CVE result = jl.set_value(result, 'MFL', lambda x: x.get('CVE') in mfl_index) result = jl.set_value(result, 'Exploit_Known', lambda x: x.get('CVE') in edb_index) temp_file.write_json_line(result) # save the temp file to the bucket temp_file.save_to_bucket(TARGET_PROJECT, TARGET_BUCKET, TARGET_BLOB)
if IP.count(".") == 3 and all(isIPv4_part(i) for i in IP.split(".")): return True if IP.count(":") == 7 and all(isIPv6_part(i) for i in IP.split(":")): return True return False cmdb_graph = nx.DiGraph() errorbin = dt.temp_file(JOB_NAME + '-errorbin') counter = 0 print('adding relationships') for blob in dt.get_list_of_blobs(SOURCE_PROJECT, SOURCE_BUCKET, RELATIONSHIP_BLOB): for line in dt.read_blob_lines(SOURCE_PROJECT, SOURCE_BUCKET, blob.name): try: record = json.loads(line) counter = counter + 1 # pre-defined edges - most aren't needed but we can't tell at this point parent_sys_id = extract_sysid(record.get('parent_link')) child_sys_id = extract_sysid(record.get('child_link')) # predefined relationships cover both directions, so # split the type and build the reciprocal relationship relationship = record.get('type_display_value', 'unknown by::unknown to').split('::') cmdb_graph.add_edge(child_sys_id, parent_sys_id, relationship=relationship[0])