def calculateID(self, file_name_fullpath): # Get the creation date for the first PersistenceItem in the audit (they will all be the same) instanceID = datetime.min tmp_instanceID = None try: file_object = loadFile(file_name_fullpath) root = ET.parse(file_object).getroot() file_object.close() reg_key = root.find('AppCompatItemExtended') reg_modified = reg_key.get('created') try: tmp_instanceID = datetime.strptime(reg_modified, "%Y-%m-%dT%H:%M:%SZ") except ValueError as e: tmp_instanceID = datetime.max logger.warning("Invalid reg_modified date found!: %s (%s)" % (reg_modified, file_name_fullpath)) instanceID = tmp_instanceID except Exception: traceback.print_exc(file=sys.stdout) # If we found no PersistenceItem date we go with plan B (but most probably this is corrupt and will fail later) if instanceID is None: file_object = loadFile(file_name_fullpath) content = file_object.read() instanceID = hashlib.md5(content).hexdigest() file_object.close() return instanceID
def calculateID(self, file_name_fullpath): instanceID = datetime.min tmp_instanceID = None try: file_object = loadFile(file_name_fullpath) root = ET.parse(file_object).getroot() file_object.close() for reg_key in root.findall('RegistryItem'): tmp_reg_key = reg_key.find('Modified') if tmp_reg_key is not None: reg_modified = tmp_reg_key.text try: tmp_instanceID = datetime.strptime(reg_modified, "%Y-%m-%dT%H:%M:%SZ") except ValueError as e: tmp_instanceID = datetime.max logger.warning("Invalid reg_modified date found!: %s (%s)" % (reg_modified, file_name_fullpath)) if instanceID < tmp_instanceID: instanceID = tmp_instanceID else: logger.warning("Found RegistryItem with no Modified date (Mir bug?): %s" % file_name_fullpath) except Exception: logger.exception("Error on calculateID for: %s" % file_name_fullpath) # If we found no Modified date in any of the RegistryItems we go with plan B (but most probably ShimCacheParser will fail to parse anyway) if instanceID is None: file_object = loadFile(file_name_fullpath) content = file_object.read() instanceID = hashlib.md5(content).hexdigest() file_object.close() return instanceID
def processArchives(filename, file_filter): # Process zip file if required and return a list of files to process files_to_process = [] if filename.endswith('.zip'): try: zip_archive_filename = filename # Open the zip archive: zip_archive = zipfile.ZipFile(loadFile(zip_archive_filename), "r") zipFileList = zip_archive.namelist() countTotalFiles = len(zipFileList) logger.info("Total files in %s: %d" % (zip_archive_filename, countTotalFiles)) logger.info("Hold on while we check the zipped files...") # Check if it's an HX audit zip file: if 'manifest.json' in zipFileList: jsondata = loadFile( os.path.join(zip_archive_filename, 'manifest.json')) audit_result_filenames = parseManifestAuditFileName( jsondata, zip_archive_filename) for (file_name_fullpath, file_name_original) in audit_result_filenames: logger.debug( "Adding file to process %s from manifest.json %s" % (file_name_fullpath, zip_archive_filename)) files_to_process.append( (file_name_fullpath, file_name_original)) else: # Process normal zip file: for zipped_filename in zipFileList: if re.match(file_filter, '\\' + zipped_filename): if filename.endswith('.zip'): files_to_process.extend( processArchives( os.path.join(zip_archive_filename, zipped_filename), file_filter)) else: logger.debug( "Adding file to process %s from zip archive" % (os.path.join( zip_archive_filename, zipped_filename), zip_archive_filename)) files_to_process.append( (os.path.join(zip_archive_filename, zipped_filename), None)) else: logger.debug("Ignoring file: %s" % os.path.join( zip_archive_filename, zipped_filename)) # if len(files_to_process) == 0: # logger.error("No valid files found!") zip_archive.close() except (IOError, zipfile.BadZipfile, struct.error), err: logger.error("Error reading zip archive: %s" % zip_archive_filename) exit(-1)
def calculateID(self, file_name_fullpath): # Get the creation date for the first PersistenceItem in the audit (they will all be the same) instanceID = datetime.min tmp_instanceID = None # Parsing these huge xml files to grab the last modified data as an instance ID is slower than just hashing the whole thing: # try: # file_object = loadFile(file_name_fullpath) # root = ET.parse(file_object).getroot() # file_object.close() # reg_key = root.find('AmCacheItem') # reg_modified = reg_key.get('created') # try: # tmp_instanceID = datetime.strptime(reg_modified, "%Y-%m-%dT%H:%M:%SZ") # except ValueError as e: # tmp_instanceID = datetime.max # logger.warning("Invalid reg_modified date found!: %s (%s)" % (reg_modified, file_name_fullpath)) # instanceID = tmp_instanceID # except Exception: # traceback.print_exc(file=sys.stdout) # If we found no AmCacheItem date we go with plan B and just hash the whole file if instanceID is None: file_object = loadFile(file_name_fullpath) content = file_object.read() instanceID = hashlib.md5(content).hexdigest() file_object.close() return instanceID
def calculateID(self, file_name_fullpath): instanceID = 0 file_object = loadFile(file_name_fullpath) regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") # Search for key containing ShimCache entries on all control sets # Use last modification time of the last modified one as instanceID root = regf_file.get_root_key() num_keys = root.get_number_of_sub_keys() for i in xrange(0, num_keys): tmp_key = root.get_sub_key(i) if "controlset" in tmp_key.get_name().lower(): session_man_key = regf_file.get_key_by_path( "%s\Control\Session Manager" % tmp_key.get_name()) num_keys = session_man_key.get_number_of_sub_keys() for i in xrange(0, num_keys): tmp_key = session_man_key.get_sub_key(i) if "appcompatibility" in tmp_key.get_name().lower( ) or "appcompatcache" in tmp_key.get_name().lower(): last_write_time = tmp_key.get_last_written_time_as_integer( ) if last_write_time > instanceID: instanceID = last_write_time break # Need to close these or the memory will never get freed: regf_file.close() del regf_file file_object.close() del file_object return instanceID
def getHostName(self, file_name_fullpath): file_object = loadFile(file_name_fullpath) regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") # Get control set number tmp_key = regf_file.get_key_by_path(r'Select') if tmp_key is not None: controlset_number = tmp_key.get_value_by_name( 'Current').get_data_as_integer() # Get host name tmp_key = regf_file.get_key_by_path( r'ControlSet00' + str(controlset_number) + '\Control\ComputerName\ComputerName') host_name = tmp_key.get_value_by_name( 'ComputerName').get_data_as_string() else: # todo: Close everything down elegantly logger.error( "Attempting to process non-SYSTEM hive with appcompat_raw_hive plugin: %s" % file_name_fullpath) raise (Exception( 'Attempting to process non-SYSTEM hive with appcompat_raw_hive plugin' )) # Need to close these or the memory will never get freed: regf_file.close() del regf_file file_object.close() del file_object return host_name
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 file_object = loadFile(file_fullpath) rows = file_object.read().splitlines()[1:] file_object.close() appCompatREGEX = re.compile( "((?:\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d)|N\/A)[, ]((?:\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d)|N\/A)[, ](.*)\\\([^\\\]*)[, ](N\/A|\d*)[, ](N\/A|True|False)" ) assert (rows is not None) for r in rows: if b'\x00' in r: logger.debug( "NULL byte found, ignoring bad shimcache parse: %s" % r) continue m = appCompatREGEX.match(r) if m: namedrow = settings.EntriesFields( HostID=hostID, EntryType=settings.__APPCOMPAT__, RowNumber=rowNumber, LastModified=unicode(m.group(1)), LastUpdate=unicode(m.group(2)), FilePath=unicode(m.group(3)), FileName=unicode(m.group(4)), Size=unicode(m.group(5)), ExecFlag=str(m.group(6)), InstanceID=instanceID) rowsData.append(namedrow) rowNumber += 1 else: logger.warning("Entry regex failed for: %s - %s" % (hostID, r))
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 rowValid = True file_object = loadFile(file_fullpath) csvdata = file_object.read().splitlines()[1:] file_object.close() data = csv.reader(csvdata, dialect='IngestDialect1') for row in data: for field in row: if b'\x00' in field: settings.logger.warning( "NULL byte found, ignoring bad shimcache parse: %s" % field) rowValid = False if rowValid: path, filename = ntpath.split(row[2]) namedrow = settings.EntriesFields( HostID=hostID, EntryType=settings.__APPCOMPAT__, RowNumber=rowNumber, LastModified=unicode(row[0]), LastUpdate=unicode(row[1]), FilePath=unicode(path), FileName=unicode(filename), Size=unicode(row[3]), ExecFlag=str(row[4]), InstanceID=instanceID) rowsData.append(namedrow) rowNumber += 1
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 file_object = loadFile(file_fullpath) rows = _processAmCacheFile_StringIO(file_object) file_object.close() for r in rows: namedrow = settings.EntriesFields( HostID=hostID, EntryType=settings.__AMCACHE__, RowNumber=rowNumber, FilePath=(None if r.path == None else ntpath.dirname(r.path)), FileName=(None if r.path == None else ntpath.basename(r.path)), Size=r.size, ExecFlag='True', SHA1=(None if r.sha1 == None else r.sha1[4:]), FileDescription=r.file_description, FirstRun=r.first_run, Created=r.created_timestamp, Modified1=r.modified_timestamp, Modified2=r.modified_timestamp2, LinkerTS=r.linker_timestamp, Product=r.product, Company=r.company, PE_sizeofimage=r.pe_sizeofimage, Version_number=r.version_number, Version=r.version, Language=r.language, Header_hash=r.header_hash, PE_checksum=r.pe_checksum, SwitchBackContext=r.switchbackcontext, InstanceID=instanceID) rowsData.append(namedrow) rowNumber += 1
def id_filename(self, file_name_fullpath): # Emulate file magic functionality without external deps # Limited to: # 'MS Windows registry file, NT/2000 or above' # 'XML 1.0 document, ASCII text' magic_id = None file_chunk = loadFile(file_name_fullpath, 200) if 'regf' in file_chunk.getvalue(): magic_id = 'MS Windows registry file, NT/2000 or above' elif '?xml' in file_chunk.getvalue(): magic_id = 'XML 1.0 document, ASCII text' elif 'Last Modified,Last Update' in file_chunk.getvalue(): magic_id = 'ShimCacheParser CSV' else: logger.warning("Unknown magic in file: %s [%s]" % (file_name_fullpath, toHex(file_chunk.getvalue()))) # Perform deeper check to distinguish subtype if 'IssueList' in file_chunk.getvalue(): magic_id += '+ Mir IssueList file' elif 'batchresult' in file_chunk.getvalue(): magic_id += '+ Mir batchresult file' elif 'itemList' in file_chunk.getvalue(): magic_id += '+ Mir itemList file' file_chunk.close() return magic_id
def calculateID(self, file_name_fullpath): # Lazy instanceID calculation, overwrite to make it faster if possible for the ingest format instanceID = None content_file = loadFile(file_name_fullpath) content = content_file.read() content_file.close() instanceID = hashlib.md5(content).hexdigest() return instanceID
def calculateID(self, file_name_fullpath): # We don't have a useful TS here so we hash it to calculate an ID file_object = loadFile(file_name_fullpath) content = file_object.read() instanceID = hashlib.md5(content).hexdigest() file_object.close() return instanceID
def checkMagic(self, file_name_fullpath): # Check magic magic_id = self.id_filename(file_name_fullpath) if 'ShimCacheParser CSV' in magic_id: file_object = loadFile(file_name_fullpath) header = file_object.readline().strip() if header == "Last Modified,Last Update,Path,File Size,Exec Flag": return True return False
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 rowValid = True minSQLiteDTS = datetime(1, 1, 1, 0, 0, 0) maxSQLiteDTS = datetime(9999, 12, 31, 0, 0, 0) file_object = loadFile(file_fullpath) csvdata = file_object.read().splitlines()[1:] file_object.close() data = csv.reader(csvdata, dialect='IngestDialect1') for row in data: for field in row: if b'\x00' in field: settings.logger.warning( "NULL byte found, ignoring bad shimcache parse: %s" % field) rowValid = False try: # Convert to timestamps: if row[0] != 'N/A': tmp_LastModified = datetime.strptime( row[0], "%Y-%m-%d %H:%M:%S") else: tmp_LastModified = minSQLiteDTS if row[1] != 'N/A': tmp_LastUpdate = datetime.strptime( row[1], "%Y-%m-%d %H:%M:%S") else: tmp_LastUpdate = minSQLiteDTS except Exception as e: print("crap") exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split( exc_tb.tb_frame.f_code.co_filename)[1] logger.info( "Exception processing row (%s): %s [%s / %s / %s]" % (e.message, unicode(ntpath.split( row[2])[0]), exc_type, fname, exc_tb.tb_lineno)) if rowValid: path, filename = ntpath.split(row[2]) namedrow = settings.EntriesFields( HostID=hostID, EntryType=settings.__APPCOMPAT__, RowNumber=rowNumber, LastModified=tmp_LastModified, LastUpdate=tmp_LastUpdate, FilePath=unicode(path), FileName=unicode(filename), Size=unicode(row[3]), ExecFlag=str(row[4]), InstanceID=instanceID) rowsData.append(namedrow) rowNumber += 1
def checkMagic(self, file_name_fullpath): # As long as we find one AppcompatCache key we're declaring it good for us file_object = loadFile(file_name_fullpath) root = ET.parse(file_object).getroot() for reg_key in root.findall('RegistryItem'): if reg_key.find('ValueName').text == "AppCompatCache": file_object.close() return True file_object.close() return False
def checkMagic(self, file_name_fullpath): # As long as we find one AmCacheItem PersistenceType we're declaring it good for us # todo: Avoid parsing the full file for this and just grep it's head file_object = loadFile(file_name_fullpath) try: root = etree.parse(file_object).getroot() if root.find('AmCacheItem'): return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) return False
def checkMagic(self, file_name_fullpath): # As long as we find one AmCacheItem PersistenceType we're declaring it good for us # Check magic magic_id = self.id_filename(file_name_fullpath) if 'XML' in magic_id: file_object = loadFile(file_name_fullpath) try: root = etree.parse(file_object).getroot() if root.find('AmCacheItem'): return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) return False
def checkMagic(self, file_name_fullpath): # As long as we find one Appcompat PersistenceType we're declaring it good for us file_object = loadFile(file_name_fullpath) try: root = etree.parse(file_object).getroot() for reg_key in root.findall('AppCompatItemExtended'): if reg_key.find('PersistenceType').text.lower() == "Appcompat".lower(): return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) #traceback.print_exc(file=sys.stdout) finally: file_object.close() return False
def checkMagic(self, file_name_fullpath): # As long as we find one AppcompatCache key we're declaring it good for us file_object = loadFile(file_name_fullpath) try: root = ET.parse(file_object).getroot() for reg_key in root.findall('RegistryItem'): if reg_key.find('ValueName').text == "AppCompatCache": return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) #traceback.print_exc(file=sys.stdout) finally: file_object.close() return False
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 check_tags = ['LastModified', 'AppCompatPath', 'ExecutionFlag'] try: xml_data = loadFile(file_fullpath) for event, element in etree.iterparse(xml_data, events=("end",)): skip_entry = False tag_dict = {} if element.tag == "AppCompatItemExtended": self._processElement(element, tag_dict) # Check we have everything we need and ignore entries with critical XML errors on them for tag in check_tags: if tag not in tag_dict or tag_dict[tag] is None: if 'AppCompatPath' in tag_dict: logger.warning("Malformed tag [%s: %s] in %s, entry: %s (skipping entry)" % (tag, tag_dict[tag], tag_dict['AppCompatPath'], file_fullpath)) else: logger.warning( "Malformed tag [%s: %s] in %s, entry: Unknown (skipping entry)" % (tag, tag_dict[tag], file_fullpath)) skip_entry = True break # If the entry is valid do some housekeeping: if not skip_entry: if tag_dict['ExecutionFlag'] == '1': tmpExexFlag = True elif tag_dict['ExecutionFlag'] == '0': tmpExexFlag = False else: tmpExexFlag = tag_dict['ExecutionFlag'] namedrow = settings.EntriesFields(HostID=hostID, EntryType=settings.__APPCOMPAT__, RowNumber=rowNumber, InstanceID=instanceID, LastModified=(tag_dict['LastModified'].replace("T"," ").replace("Z","") if 'LastModified' in tag_dict else '0001-01-01 00:00:00'), LastUpdate=(tag_dict['LastUpdate'].replace("T"," ").replace("Z","") if 'LastUpdate' in tag_dict else '0001-01-01 00:00:00'), FileName=ntpath.basename(tag_dict['AppCompatPath']), FilePath=ntpath.dirname(tag_dict['AppCompatPath']), Size=(tag_dict['Size'] if 'Size' in tag_dict else 'N/A'), ExecFlag=tmpExexFlag) rowsData.append(namedrow) rowNumber += 1 else: pass element.clear() xml_data.close() except Exception as e: print e.message print traceback.format_exc() pass
def checkMagic(self, file_name_fullpath): # As long as we find one ShimCacheItem entry we're declaring it good for us # Check magic magic_id = self.id_filename(file_name_fullpath) if 'XML' in magic_id: file_object = loadFile(file_name_fullpath) try: root = etree.parse(file_object).getroot() if root.find('ShimCacheItem') is not None: return True except Exception as e: logger.exception("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) finally: file_object.close() return False
def processFile(self, file_fullpath, hostID, instanceID, rowsData): # Returns data in rowsData rowNumber = 0 # Process file using ShimCacheParser try: xml_data = loadFile(file_fullpath) (error, entries) = read_mir(xml_data, True) xml_data.close() assert(not error) if not entries: logger.warning("[ShimCacheParser] found no entries for %s" % file_fullpath) return False else: rows = write_it(entries, "StringIO")[1:] except IOError, err: logger.error("[ShimCacheParser] Error opening binary file: %s" % str(err))
def calculateID(self, file_name_fullpath): instanceID = None file_object = loadFile(file_name_fullpath) regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") tmp = regf_file.get_key_by_path(r'Root\File') if regf_file.get_key_by_path(r'Root\File') == None: logger.warning("Not an AmCache hive! [%s]" % file_name_fullpath) else: instanceID = regf_file.root_key.last_written_time # Need to close these or the memory will never get freed: regf_file.close() del regf_file file_object.close() del file_object return instanceID
def checkMagic(self, file_name_fullpath): # Check magic magic_id = self.id_filename(file_name_fullpath) if 'XML' in magic_id: file_object = loadFile(file_name_fullpath) try: # todo: Issue documents are declared as UTF-8 but not encoded as such, etree may fail to parse them # root = etree.parse(file_object).getroot() # if root.tag == 'IssueList': return True if 'IssueList' in magic_id or 'batchresult' in magic_id: return True except Exception as e: logger.exception("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) finally: file_object.close() return False
def checkMagic(self, file_name_fullpath): # As long as we find one AppcompatCache key we're declaring it good for us # Check magic magic_id = self.id_filename(file_name_fullpath) if 'XML' in magic_id: file_object = loadFile(file_name_fullpath) try: root = ET.parse(file_object).getroot() # todo: relpace findall with find: for reg_key in root.findall('RegistryItem'): if reg_key.find('ValueName').text == "AppCompatCache": return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) finally: file_object.close() return False
def checkMagic(self, file_name_fullpath): # As long as we find one Appcompat PersistenceType we're declaring it good for us # Check magic magic_id = self.id_filename(file_name_fullpath) if 'XML' in magic_id: file_object = loadFile(file_name_fullpath) try: root = etree.parse(file_object).getroot() # todo: replace findall with find for reg_key in root.findall('PersistenceItem'): if reg_key.find('PersistenceType').text.lower( ) == "Appcompat".lower(): return True except Exception: logger.warning("[%s] Failed to parse XML for: %s" % (self.ingest_type, file_name_fullpath)) return False
def processFile(self, file_fullpath, hostID, instanceID, rowsData): rowNumber = 0 entries = None minSQLiteDTS = datetime(1, 1, 1, 0, 0, 0) maxSQLiteDTS = datetime(9999, 12, 31, 0, 0, 0) # Process file using ShimCacheParser try: entries = read_from_hive(loadFile(file_fullpath), True) if not entries: logger.warning("[ShimCacheParser] found no entries for %s" % file_fullpath) return False else: rows = write_it(entries, "StringIO")[1:] except IOError, err: logger.error("[ShimCacheParser] Error opening binary file: %s" % str(err))
def checkMagic(self, file_name_fullpath): magic_ok = False # Quick and dirty check file_object = loadFile(file_name_fullpath) tmp = struct.unpack('4s', file_object.read(4)) if tmp[0] == "regf": # Perform a deeper check using pyregf regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") magic_key = regf_file.get_key_by_path(r'Root\File') regf_file.close() del regf_file if magic_key is not None: magic_ok = True file_object.close() del file_object return magic_ok
def checkMagic(self, file_name_fullpath): magic_ok = False # Check magic magic_id = self.id_filename(file_name_fullpath) if 'registry' in magic_id: file_object = loadFile(file_name_fullpath) # Perform a deeper check using pyregf regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") magic_key = regf_file.get_key_by_path(r'Root\File') regf_file.close() del regf_file if magic_key is not None: magic_ok = True file_object.close() del file_object return magic_ok
def checkMagic(self, file_name_fullpath): magic_ok = False # Check magic magic_id = self.id_filename(file_name_fullpath) if 'registry' in magic_id: file_object = loadFile(file_name_fullpath) regf_file = pyregf.file() regf_file.open_file_object(file_object, "r") magic_key = regf_file.get_key_by_path(r'Select') regf_file.close() del regf_file if magic_key is not None: magic_ok = True # Need to close these or the memory will never get freed: file_object.close() del file_object return magic_ok