def main(): if len(sys.argv) < 2: print("Usage: {} <input.regxml>".format(sys.argv[0])) exit(1) dfxml.read_regxml(xmlfile=open(sys.argv[1], "rb"), callback=process) timeline.sort() for record in timeline: print("\t".join(map(str, record)))
def main(): if len(sys.argv) < 2: print("Usage: {} <input.regxml>".format(sys.argv[0])) exit(1) dfxml.read_regxml(xmlfile=open(sys.argv[1],"rb"), callback=process) timeline.sort() for record in timeline: print("\t".join( map(str, record)) )
def process(self, fname): self.current_fname = fname if fname.endswith(".regxml"): reader = dfxml.read_regxml(xmlfile=open(infile, 'rb'), callback=self.process_cell)
def process(self,fname): self.current_fname = fname if fname.endswith(".regxml"): reader = dfxml.read_regxml(xmlfile=open(fname,'rb'), callback=self.process_cell)
def main(): parser = argparse.ArgumentParser( prog="rx_make_database.py", description= "Convert RegXML files from disk sequences to a single SQLite database of Registry cells." ) parser.add_argument( "successful_regxml_list", action="store", help= "The regxml list should only have regxml files from successfully completed producing processes (such as hivexml checked with xmllint). Files should be given as absolute paths." ) parser.add_argument( "hive_meta_list", action="store", help= "The hive meta list should have absolute paths to RegXML files, with each line containing a hive file absolute path, the hive's full in-image path as given in DFXML, and its maccr times (in that order)." ) parser.add_argument("output_database_file", action="store", help="Outut database must not exist.") parser.add_argument( "--drive_sequence_listing", required=False, action="store", help= "The drive sequence listing should have one line per drive image, and the following line being either the next image taken of that drive, or a blank line to indicate the drive's timeline is complete. A sequence line should have two tab-delimited fields, first the image name, second the name of the image sequence." ) parser.add_argument("--verbose", action="store_true", help="Enable verbose output.") args = parser.parse_args() if os.path.exists(args.output_database_file): parser.print_help() exit(1) #Identify disk image sequences """Key: image base name. Value: immediately-preceding image.""" image_sequence_priors = {} """Key: image base name. Value: Line number in the sequence file.""" image_sequence_numbers = {} """Key: image base name. Value: image sequence name.""" image_sequence_names = {} working_with_priors = False #Populate disk image sequence index if optional parameter is passed if args.drive_sequence_listing is not None: working_with_priors = True image_sequences = [[]] sequence_file = open(args.drive_sequence_listing, "r") for (line_no, line) in enumerate(sequence_file): line_cleaned = line.strip() if line_cleaned == "": image_sequences.append([]) else: line_parts = line_cleaned.split("\t") image_sequences[-1].append(line_parts[0]) image_sequence_numbers[line_parts[0]] = line_no if len(line_parts) > 1: image_sequence_names[line_parts[0]] = line_parts[1] sequence_file.close() for image_sequence in image_sequences: last_image = None for image in image_sequence: image_sequence_priors[image] = last_image last_image = image #Produce a list of the RegXML files that completed #List does double-duty as a map from a regxml file to the hive file from which it was derived. successful_regxmls = {} successful_regxml_file = open(args.successful_regxml_list, "r") for line in successful_regxml_file: cleaned_line_parts = line.strip().split("\t") if len(cleaned_line_parts) == 2: hive_path = cleaned_line_parts[0] xml_path = cleaned_line_parts[1] elif len(cleaned_line_parts) == 0: continue else: raise Exception( "Unexpected number of line components when reading hive-regxml mapping:\nrepr(line) = " + repr(line)) successful_regxmls[hive_path] = xml_path if args.verbose: print("Successful hive file-RegXML pairs:") print("\n".join([(k, successful_regxmls[k]) for k in successful_regxmls])) #Produce a list of the images to use work_list_unordered = [] image_list_file = open(args.hive_meta_list, "r") for line in image_list_file: cleaned_line = line.strip() if cleaned_line != "": hive_dump_path, image_file, dfxml_hive_path, hive_mtime, hive_atime, hive_ctime, hive_crtime = cleaned_line.split( "\t") if hive_dump_path in successful_regxmls: regxml_path = successful_regxmls[hive_dump_path] if working_with_priors: #We want all the input drives to have a prior image or None explicitly specified. So, don't use .get(). prior_image = image_sequence_priors[image_file] else: prior_image = None work_list_unordered.append({ "regxml_path": regxml_path, "dfxml_hive_path": dfxml_hive_path, "image_file": image_file, "prior_image": prior_image, "mtime": hive_mtime, "atime": hive_atime, "ctime": hive_ctime, "crtime": hive_crtime, "image_sequence_number": image_sequence_numbers.get(image_file) }) image_list_file.close() #Order by manifest listing. if working_with_priors: work_list = sorted(work_list_unordered, key=itemgetter("image_sequence_number")) else: #Ingest order will do fine in the single-image case. work_list = work_list_unordered if args.verbose: print("In-order work list we are processing:") print("\n".join(map(str, work_list))) #Begin the SQL database conn = sqlite3.connect(args.output_database_file) conn.isolation_level = "EXCLUSIVE" conn.row_factory = sqlite3.Row cursor = conn.cursor() #Begin the SQL tables cursor.execute(SQL_CREATE_TABLE_IMAGEANNO) cursor.execute(SQL_CREATE_TABLE_HIVEANALYSIS) cursor.execute(SQL_CREATE_TABLE_HIVES_FAILED) cursor.execute(SQL_CREATE_TABLE_CELLANALYSIS) cursor.execute(SQL_CREATE_INDEX_CELLANALYSIS_FULLPATH) #Populate for work_order in work_list: current_image_id = None #Maybe make a new image record cursor.execute("SELECT * FROM image_anno WHERE name = ?", (work_order["image_file"], )) for row in cursor: current_image_id = row["image_id"] break if current_image_id == None: #Create new record image_anno_new_record = {} #image name image_anno_new_record["name"] = work_order["image_file"] #image sequence name image_anno_new_record["sequence_name"] = image_sequence_names.get( image_anno_new_record["name"]) #image sequence prior cursor.execute("SELECT image_id FROM image_anno WHERE name = ?", (work_order["prior_image"], )) for row in cursor: image_anno_new_record["sequence_prior_image"] = row["image_id"] break #Insert insert_db(cursor, "image_anno", image_anno_new_record) conn.commit() #Fetch fresh id for row in cursor.execute( "SELECT * FROM image_anno WHERE rowid = ?;", (cursor.lastrowid, )): current_image_id = row["image_id"] #Make a new hive record dfxml_hive_path = work_order["dfxml_hive_path"] hive_type = hive_type_from_path(dfxml_hive_path, True) hive_sequence_name = hive_type_from_path(dfxml_hive_path, False) cursor.execute( "INSERT INTO hive_analysis(image_file, regxml_path, hive_file_path, hive_type, hive_sequence_name, mtime_file_system, atime_file_system, ctime_file_system, crtime_file_system) VALUES (?,?,?,?,?,?,?,?,?);", (work_order["image_file"], work_order["regxml_path"], dfxml_hive_path, hive_type, hive_sequence_name, work_order["mtime"], work_order["atime"], work_order["ctime"], work_order["crtime"])) conn.commit() #Get hive id current_hive_id = None cursor.execute("SELECT * FROM hive_analysis WHERE rowid = ?;", (cursor.lastrowid, )) current_rec = cursor.fetchone() current_hive_id = current_rec["hive_id"] if current_hive_id == None: raise ValueError("Couldn't get last hive_id, somehow.") #Get previous hive in sequence previous_hive_id = None if working_with_priors: #Note we're not using .get() - we want an error raised if we have a broken sequence. previous_image_file = image_sequence_priors[ work_order["image_file"]] for r in cursor.execute( "SELECT hive_id FROM hive_analysis WHERE image_file = ? AND hive_file_path = ?", (previous_image_file, work_order["dfxml_hive_path"])): previous_hive_id = r["hive_id"] cursor.execute( "UPDATE hive_analysis SET previous_hive_in_sequence = ? WHERE hive_id = ?;", (previous_hive_id, current_hive_id)) #Commit updates for hive_analysis conn.commit() #Process the RegXML into cell records, capturing notes on failure reader = None try: reader = dfxml.read_regxml( xmlfile=open(work_order["regxml_path"], "rb"), callback=lambda co: process_regxml_callback_object( co, current_hive_id, previous_hive_id, cursor)) except: sql_insert_failure = "INSERT INTO hives_failed(hive_id, cells_processed, error_text) VALUES (?, ?, ?);" cursor.execute( sql_insert_failure, (current_hive_id, hive_cell_proc_tallies[current_hive_id], traceback.format_exc())) conn.commit() #Ensure the last updates made it in #Update the hive and image records with the necessarily-computed times if reader is not None: image_updates = {} hive_column_value_updates = {} hive_column_value_updates["mtime_hive_root"] = str( reader.registry_object.mtime()) if "mtime_latest_key" in dir(reader.registry_object): hive_column_value_updates["mtime_latest_key"] = str( reader.registry_object.mtime_latest_key) if "mtime_earliest_key" in dir(reader.registry_object): hive_column_value_updates["mtime_earliest_key"] = str( reader.registry_object.mtime_earliest_key) if "time_last_clean_shutdown" in dir(reader.registry_object): image_updates["last_clean_shutdown_time_hive"] = str( reader.registry_object.time_last_clean_shutdown) #Update tables update_db(conn, cursor, "hive_analysis", hive_column_value_updates, "hive_id", current_hive_id, True) update_db(conn, cursor, "image_anno", image_updates, "image_id", current_image_id, True) sys.stderr.write("Note: Just finished with hive %d.\n" % current_hive_id) #TODO Also add to the where clause that this should not run on Vista systems. This means digging for that key that notes where the system type is, I know Carvey noted it... #Now we have data...but possibly too much. cursor.execute( "SELECT COUNT(*) FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);" ) row = cursor.fetchone() if row[0] > 0: sys.stderr.write( "Note: Deleting %d rows from cell_analysis due to processing for hives failing.\n" % row[0]) cursor.execute( "DELETE FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);" ) #Now it's just right. cursor.close() conn.close()
def main(): parser = argparse.ArgumentParser(prog="rx_make_database.py", description="Convert RegXML files from disk sequences to a single SQLite database of Registry cells.") parser.add_argument("successful_regxml_list", action="store", help="The regxml list should only have regxml files from successfully completed producing processes (such as hivexml checked with xmllint). Files should be given as absolute paths.") parser.add_argument("hive_meta_list", action="store", help="The hive meta list should have absolute paths to RegXML files, with each line containing a hive file absolute path, the hive's full in-image path as given in DFXML, and its maccr times (in that order).") parser.add_argument("output_database_file", action="store", help="Outut database must not exist.") parser.add_argument("--drive_sequence_listing", required=False, action="store", help="The drive sequence listing should have one line per drive image, and the following line being either the next image taken of that drive, or a blank line to indicate the drive's timeline is complete. A sequence line should have two tab-delimited fields, first the image name, second the name of the image sequence.") parser.add_argument("--verbose", action="store_true", help="Enable verbose output.") args = parser.parse_args() if os.path.exists(args.output_database_file): parser.print_help() exit(1) #Identify disk image sequences """Key: image base name. Value: immediately-preceding image.""" image_sequence_priors = {} """Key: image base name. Value: Line number in the sequence file.""" image_sequence_numbers = {} """Key: image base name. Value: image sequence name.""" image_sequence_names = {} working_with_priors = False #Populate disk image sequence index if optional parameter is passed if args.drive_sequence_listing is not None: working_with_priors = True image_sequences = [[]] sequence_file = open(args.drive_sequence_listing, "r") for (line_no, line) in enumerate(sequence_file): line_cleaned = line.strip() if line_cleaned == "": image_sequences.append([]) else: line_parts = line_cleaned.split("\t") image_sequences[-1].append(line_parts[0]) image_sequence_numbers[line_parts[0]] = line_no if len(line_parts) > 1: image_sequence_names[line_parts[0]] = line_parts[1] sequence_file.close() for image_sequence in image_sequences: last_image = None for image in image_sequence: image_sequence_priors[image] = last_image last_image = image #Produce a list of the RegXML files that completed #List does double-duty as a map from a regxml file to the hive file from which it was derived. successful_regxmls = {} successful_regxml_file = open(args.successful_regxml_list, "r") for line in successful_regxml_file: cleaned_line_parts = line.strip().split("\t") if len(cleaned_line_parts) == 2: hive_path = cleaned_line_parts[0] xml_path = cleaned_line_parts[1] elif len(cleaned_line_parts) == 0: continue else: raise Exception("Unexpected number of line components when reading hive-regxml mapping:\nrepr(line) = " + repr(line)) successful_regxmls[hive_path] = xml_path if args.verbose: print("Successful hive file-RegXML pairs:") print("\n".join([(k,successful_regxmls[k]) for k in successful_regxmls])) #Produce a list of the images to use work_list_unordered = [] image_list_file = open(args.hive_meta_list, "r") for line in image_list_file: cleaned_line = line.strip() if cleaned_line != "": hive_dump_path, image_file, dfxml_hive_path, hive_mtime, hive_atime, hive_ctime, hive_crtime = cleaned_line.split("\t") if hive_dump_path in successful_regxmls: regxml_path = successful_regxmls[hive_dump_path] if working_with_priors: #We want all the input drives to have a prior image or None explicitly specified. So, don't use .get(). prior_image = image_sequence_priors[image_file] else: prior_image = None work_list_unordered.append({"regxml_path":regxml_path, "dfxml_hive_path":dfxml_hive_path, "image_file":image_file, "prior_image":prior_image, "mtime":hive_mtime, "atime":hive_atime, "ctime":hive_ctime, "crtime":hive_crtime, "image_sequence_number":image_sequence_numbers.get(image_file)}) image_list_file.close() #Order by manifest listing. if working_with_priors: work_list = sorted(work_list_unordered, key=itemgetter("image_sequence_number")) else: #Ingest order will do fine in the single-image case. work_list = work_list_unordered if args.verbose: print("In-order work list we are processing:") print("\n".join(map(str, work_list))) #Begin the SQL database conn = sqlite3.connect(args.output_database_file) conn.isolation_level = "EXCLUSIVE" conn.row_factory = sqlite3.Row cursor = conn.cursor() #Begin the SQL tables cursor.execute(SQL_CREATE_TABLE_IMAGEANNO) cursor.execute(SQL_CREATE_TABLE_HIVEANALYSIS) cursor.execute(SQL_CREATE_TABLE_HIVES_FAILED) cursor.execute(SQL_CREATE_TABLE_CELLANALYSIS) cursor.execute(SQL_CREATE_INDEX_CELLANALYSIS_FULLPATH) #Populate for work_order in work_list: current_image_id = None #Maybe make a new image record cursor.execute("SELECT * FROM image_anno WHERE name = ?", (work_order["image_file"],)) for row in cursor: current_image_id = row["image_id"] break if current_image_id == None: #Create new record image_anno_new_record = {} #image name image_anno_new_record["name"] = work_order["image_file"] #image sequence name image_anno_new_record["sequence_name"] = image_sequence_names.get(image_anno_new_record["name"]) #image sequence prior cursor.execute("SELECT image_id FROM image_anno WHERE name = ?", (work_order["prior_image"],)) for row in cursor: image_anno_new_record["sequence_prior_image"] = row["image_id"] break #Insert insert_db(cursor, "image_anno", image_anno_new_record) conn.commit() #Fetch fresh id for row in cursor.execute("SELECT * FROM image_anno WHERE rowid = ?;", (cursor.lastrowid,)): current_image_id = row["image_id"] #Make a new hive record dfxml_hive_path = work_order["dfxml_hive_path"] hive_type = hive_type_from_path(dfxml_hive_path, True) hive_sequence_name = hive_type_from_path(dfxml_hive_path, False) cursor.execute( "INSERT INTO hive_analysis(image_file, regxml_path, hive_file_path, hive_type, hive_sequence_name, mtime_file_system, atime_file_system, ctime_file_system, crtime_file_system) VALUES (?,?,?,?,?,?,?,?,?);", (work_order["image_file"], work_order["regxml_path"], dfxml_hive_path, hive_type, hive_sequence_name, work_order["mtime"], work_order["atime"], work_order["ctime"], work_order["crtime"]) ) conn.commit() #Get hive id current_hive_id = None cursor.execute("SELECT * FROM hive_analysis WHERE rowid = ?;", (cursor.lastrowid,)) current_rec = cursor.fetchone() current_hive_id = current_rec["hive_id"] if current_hive_id == None: raise ValueError("Couldn't get last hive_id, somehow.") #Get previous hive in sequence previous_hive_id = None if working_with_priors: #Note we're not using .get() - we want an error raised if we have a broken sequence. previous_image_file = image_sequence_priors[work_order["image_file"]] for r in cursor.execute("SELECT hive_id FROM hive_analysis WHERE image_file = ? AND hive_file_path = ?", (previous_image_file, work_order["dfxml_hive_path"])): previous_hive_id = r["hive_id"] cursor.execute("UPDATE hive_analysis SET previous_hive_in_sequence = ? WHERE hive_id = ?;", (previous_hive_id, current_hive_id)) #Commit updates for hive_analysis conn.commit() #Process the RegXML into cell records, capturing notes on failure reader = None try: reader = dfxml.read_regxml(xmlfile=open(work_order["regxml_path"], "rb"), callback=lambda co: process_regxml_callback_object(co, current_hive_id, previous_hive_id, cursor)) except: sql_insert_failure = "INSERT INTO hives_failed(hive_id, cells_processed, error_text) VALUES (?, ?, ?);" cursor.execute(sql_insert_failure, (current_hive_id, hive_cell_proc_tallies[current_hive_id], traceback.format_exc())) conn.commit() #Ensure the last updates made it in #Update the hive and image records with the necessarily-computed times if reader is not None: image_updates = {} hive_column_value_updates = {} hive_column_value_updates["mtime_hive_root"] = str(reader.registry_object.mtime()) if "mtime_latest_key" in dir(reader.registry_object): hive_column_value_updates["mtime_latest_key"] = str(reader.registry_object.mtime_latest_key) if "mtime_earliest_key" in dir(reader.registry_object): hive_column_value_updates["mtime_earliest_key"] = str(reader.registry_object.mtime_earliest_key) if "time_last_clean_shutdown" in dir(reader.registry_object): image_updates["last_clean_shutdown_time_hive"] = str(reader.registry_object.time_last_clean_shutdown) #Update tables update_db(conn, cursor, "hive_analysis", hive_column_value_updates, "hive_id", current_hive_id, True) update_db(conn, cursor, "image_anno", image_updates, "image_id", current_image_id, True) sys.stderr.write("Note: Just finished with hive %d.\n" % current_hive_id) #TODO Also add to the where clause that this should not run on Vista systems. This means digging for that key that notes where the system type is, I know Carvey noted it... #Now we have data...but possibly too much. cursor.execute("SELECT COUNT(*) FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);") row = cursor.fetchone() if row[0] > 0: sys.stderr.write("Note: Deleting %d rows from cell_analysis due to processing for hives failing.\n" % row[0]) cursor.execute("DELETE FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);") #Now it's just right. cursor.close() conn.close()