コード例 #1
0
def main():
    if len(sys.argv) < 2:
        print("Usage: {} <input.regxml>".format(sys.argv[0]))
        exit(1)
    dfxml.read_regxml(xmlfile=open(sys.argv[1], "rb"), callback=process)
    timeline.sort()
    for record in timeline:
        print("\t".join(map(str, record)))
コード例 #2
0
def main():
    if len(sys.argv) < 2:
        print("Usage: {} <input.regxml>".format(sys.argv[0]))
        exit(1)
    dfxml.read_regxml(xmlfile=open(sys.argv[1],"rb"), callback=process)
    timeline.sort()
    for record in timeline:
        print("\t".join( map(str, record)) )
コード例 #3
0
ファイル: rdifference.py プロジェクト: pombredanne/bitcurator
 def process(self, fname):
     self.current_fname = fname
     if fname.endswith(".regxml"):
         reader = dfxml.read_regxml(xmlfile=open(infile, 'rb'),
                                    callback=self.process_cell)
コード例 #4
0
ファイル: rdifference.py プロジェクト: simsong/dfxml
 def process(self,fname):
     self.current_fname = fname
     if fname.endswith(".regxml"):
         reader = dfxml.read_regxml(xmlfile=open(fname,'rb'), callback=self.process_cell)
コード例 #5
0
def main():
    parser = argparse.ArgumentParser(
        prog="rx_make_database.py",
        description=
        "Convert RegXML files from disk sequences to a single SQLite database of Registry cells."
    )
    parser.add_argument(
        "successful_regxml_list",
        action="store",
        help=
        "The regxml list should only have regxml files from successfully completed producing processes (such as hivexml checked with xmllint).  Files should be given as absolute paths."
    )
    parser.add_argument(
        "hive_meta_list",
        action="store",
        help=
        "The hive meta list should have absolute paths to RegXML files, with each line containing a hive file absolute path, the hive's full in-image path as given in DFXML, and its maccr times (in that order)."
    )
    parser.add_argument("output_database_file",
                        action="store",
                        help="Outut database must not exist.")
    parser.add_argument(
        "--drive_sequence_listing",
        required=False,
        action="store",
        help=
        "The drive sequence listing should have one line per drive image, and the following line being either the next image taken of that drive, or a blank line to indicate the drive's timeline is complete.  A sequence line should have two tab-delimited fields, first the image name, second the name of the image sequence."
    )
    parser.add_argument("--verbose",
                        action="store_true",
                        help="Enable verbose output.")
    args = parser.parse_args()
    if os.path.exists(args.output_database_file):
        parser.print_help()
        exit(1)

    #Identify disk image sequences
    """Key: image base name.  Value: immediately-preceding image."""
    image_sequence_priors = {}
    """Key: image base name.  Value: Line number in the sequence file."""
    image_sequence_numbers = {}
    """Key: image base name.  Value: image sequence name."""
    image_sequence_names = {}
    working_with_priors = False

    #Populate disk image sequence index if optional parameter is passed
    if args.drive_sequence_listing is not None:
        working_with_priors = True
        image_sequences = [[]]

        sequence_file = open(args.drive_sequence_listing, "r")
        for (line_no, line) in enumerate(sequence_file):
            line_cleaned = line.strip()
            if line_cleaned == "":
                image_sequences.append([])
            else:
                line_parts = line_cleaned.split("\t")
                image_sequences[-1].append(line_parts[0])
                image_sequence_numbers[line_parts[0]] = line_no
                if len(line_parts) > 1:
                    image_sequence_names[line_parts[0]] = line_parts[1]
        sequence_file.close()
        for image_sequence in image_sequences:
            last_image = None
            for image in image_sequence:
                image_sequence_priors[image] = last_image
                last_image = image

    #Produce a list of the RegXML files that completed
    #List does double-duty as a map from a regxml file to the hive file from which it was derived.
    successful_regxmls = {}
    successful_regxml_file = open(args.successful_regxml_list, "r")
    for line in successful_regxml_file:
        cleaned_line_parts = line.strip().split("\t")
        if len(cleaned_line_parts) == 2:
            hive_path = cleaned_line_parts[0]
            xml_path = cleaned_line_parts[1]
        elif len(cleaned_line_parts) == 0:
            continue
        else:
            raise Exception(
                "Unexpected number of line components when reading hive-regxml mapping:\nrepr(line) = "
                + repr(line))
        successful_regxmls[hive_path] = xml_path
    if args.verbose:
        print("Successful hive file-RegXML pairs:")
        print("\n".join([(k, successful_regxmls[k])
                         for k in successful_regxmls]))

    #Produce a list of the images to use
    work_list_unordered = []

    image_list_file = open(args.hive_meta_list, "r")
    for line in image_list_file:
        cleaned_line = line.strip()
        if cleaned_line != "":
            hive_dump_path, image_file, dfxml_hive_path, hive_mtime, hive_atime, hive_ctime, hive_crtime = cleaned_line.split(
                "\t")
            if hive_dump_path in successful_regxmls:
                regxml_path = successful_regxmls[hive_dump_path]
                if working_with_priors:
                    #We want all the input drives to have a prior image or None explicitly specified.  So, don't use .get().
                    prior_image = image_sequence_priors[image_file]
                else:
                    prior_image = None
                work_list_unordered.append({
                    "regxml_path":
                    regxml_path,
                    "dfxml_hive_path":
                    dfxml_hive_path,
                    "image_file":
                    image_file,
                    "prior_image":
                    prior_image,
                    "mtime":
                    hive_mtime,
                    "atime":
                    hive_atime,
                    "ctime":
                    hive_ctime,
                    "crtime":
                    hive_crtime,
                    "image_sequence_number":
                    image_sequence_numbers.get(image_file)
                })
    image_list_file.close()
    #Order by manifest listing.
    if working_with_priors:
        work_list = sorted(work_list_unordered,
                           key=itemgetter("image_sequence_number"))
    else:
        #Ingest order will do fine in the single-image case.
        work_list = work_list_unordered
    if args.verbose:
        print("In-order work list we are processing:")
        print("\n".join(map(str, work_list)))

    #Begin the SQL database
    conn = sqlite3.connect(args.output_database_file)
    conn.isolation_level = "EXCLUSIVE"
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()

    #Begin the SQL tables
    cursor.execute(SQL_CREATE_TABLE_IMAGEANNO)
    cursor.execute(SQL_CREATE_TABLE_HIVEANALYSIS)
    cursor.execute(SQL_CREATE_TABLE_HIVES_FAILED)
    cursor.execute(SQL_CREATE_TABLE_CELLANALYSIS)
    cursor.execute(SQL_CREATE_INDEX_CELLANALYSIS_FULLPATH)

    #Populate
    for work_order in work_list:
        current_image_id = None
        #Maybe make a new image record
        cursor.execute("SELECT * FROM image_anno WHERE name = ?",
                       (work_order["image_file"], ))
        for row in cursor:
            current_image_id = row["image_id"]
            break
        if current_image_id == None:
            #Create new record
            image_anno_new_record = {}

            #image name
            image_anno_new_record["name"] = work_order["image_file"]

            #image sequence name
            image_anno_new_record["sequence_name"] = image_sequence_names.get(
                image_anno_new_record["name"])

            #image sequence prior
            cursor.execute("SELECT image_id FROM image_anno WHERE name = ?",
                           (work_order["prior_image"], ))
            for row in cursor:
                image_anno_new_record["sequence_prior_image"] = row["image_id"]
                break

            #Insert
            insert_db(cursor, "image_anno", image_anno_new_record)
            conn.commit()

            #Fetch fresh id
            for row in cursor.execute(
                    "SELECT * FROM image_anno WHERE rowid = ?;",
                (cursor.lastrowid, )):
                current_image_id = row["image_id"]

        #Make a new hive record
        dfxml_hive_path = work_order["dfxml_hive_path"]
        hive_type = hive_type_from_path(dfxml_hive_path, True)
        hive_sequence_name = hive_type_from_path(dfxml_hive_path, False)
        cursor.execute(
            "INSERT INTO hive_analysis(image_file, regxml_path, hive_file_path, hive_type, hive_sequence_name, mtime_file_system, atime_file_system, ctime_file_system, crtime_file_system) VALUES (?,?,?,?,?,?,?,?,?);",
            (work_order["image_file"], work_order["regxml_path"],
             dfxml_hive_path, hive_type, hive_sequence_name,
             work_order["mtime"], work_order["atime"], work_order["ctime"],
             work_order["crtime"]))
        conn.commit()

        #Get hive id
        current_hive_id = None
        cursor.execute("SELECT * FROM hive_analysis WHERE rowid = ?;",
                       (cursor.lastrowid, ))
        current_rec = cursor.fetchone()
        current_hive_id = current_rec["hive_id"]
        if current_hive_id == None:
            raise ValueError("Couldn't get last hive_id, somehow.")

        #Get previous hive in sequence
        previous_hive_id = None
        if working_with_priors:
            #Note we're not using .get() - we want an error raised if we have a broken sequence.
            previous_image_file = image_sequence_priors[
                work_order["image_file"]]
            for r in cursor.execute(
                    "SELECT hive_id FROM hive_analysis WHERE image_file = ? AND hive_file_path = ?",
                (previous_image_file, work_order["dfxml_hive_path"])):
                previous_hive_id = r["hive_id"]
            cursor.execute(
                "UPDATE hive_analysis SET previous_hive_in_sequence = ? WHERE hive_id = ?;",
                (previous_hive_id, current_hive_id))

        #Commit updates for hive_analysis
        conn.commit()

        #Process the RegXML into cell records, capturing notes on failure
        reader = None
        try:
            reader = dfxml.read_regxml(
                xmlfile=open(work_order["regxml_path"], "rb"),
                callback=lambda co: process_regxml_callback_object(
                    co, current_hive_id, previous_hive_id, cursor))
        except:
            sql_insert_failure = "INSERT INTO hives_failed(hive_id, cells_processed, error_text) VALUES (?, ?, ?);"
            cursor.execute(
                sql_insert_failure,
                (current_hive_id, hive_cell_proc_tallies[current_hive_id],
                 traceback.format_exc()))
        conn.commit()  #Ensure the last updates made it in

        #Update the hive and image records with the necessarily-computed times
        if reader is not None:
            image_updates = {}
            hive_column_value_updates = {}
            hive_column_value_updates["mtime_hive_root"] = str(
                reader.registry_object.mtime())
            if "mtime_latest_key" in dir(reader.registry_object):
                hive_column_value_updates["mtime_latest_key"] = str(
                    reader.registry_object.mtime_latest_key)
            if "mtime_earliest_key" in dir(reader.registry_object):
                hive_column_value_updates["mtime_earliest_key"] = str(
                    reader.registry_object.mtime_earliest_key)
            if "time_last_clean_shutdown" in dir(reader.registry_object):
                image_updates["last_clean_shutdown_time_hive"] = str(
                    reader.registry_object.time_last_clean_shutdown)

            #Update tables
            update_db(conn, cursor, "hive_analysis", hive_column_value_updates,
                      "hive_id", current_hive_id, True)
            update_db(conn, cursor, "image_anno", image_updates, "image_id",
                      current_image_id, True)
        sys.stderr.write("Note:  Just finished with hive %d.\n" %
                         current_hive_id)

    #TODO Also add to the where clause that this should not run on Vista systems.  This means digging for that key that notes where the system type is, I know Carvey noted it...

    #Now we have data...but possibly too much.
    cursor.execute(
        "SELECT COUNT(*) FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);"
    )
    row = cursor.fetchone()
    if row[0] > 0:
        sys.stderr.write(
            "Note:  Deleting %d rows from cell_analysis due to processing for hives failing.\n"
            % row[0])
    cursor.execute(
        "DELETE FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);"
    )

    #Now it's just right.
    cursor.close()
    conn.close()
コード例 #6
0
def main():
    parser = argparse.ArgumentParser(prog="rx_make_database.py", description="Convert RegXML files from disk sequences to a single SQLite database of Registry cells.")
    parser.add_argument("successful_regxml_list", action="store", help="The regxml list should only have regxml files from successfully completed producing processes (such as hivexml checked with xmllint).  Files should be given as absolute paths.")
    parser.add_argument("hive_meta_list", action="store", help="The hive meta list should have absolute paths to RegXML files, with each line containing a hive file absolute path, the hive's full in-image path as given in DFXML, and its maccr times (in that order).")
    parser.add_argument("output_database_file", action="store", help="Outut database must not exist.")
    parser.add_argument("--drive_sequence_listing", required=False, action="store", help="The drive sequence listing should have one line per drive image, and the following line being either the next image taken of that drive, or a blank line to indicate the drive's timeline is complete.  A sequence line should have two tab-delimited fields, first the image name, second the name of the image sequence.")
    parser.add_argument("--verbose", action="store_true", help="Enable verbose output.")
    args = parser.parse_args()
    if os.path.exists(args.output_database_file):
        parser.print_help()
        exit(1)
    
    #Identify disk image sequences
    """Key: image base name.  Value: immediately-preceding image."""
    image_sequence_priors = {}
    """Key: image base name.  Value: Line number in the sequence file."""
    image_sequence_numbers = {}
    """Key: image base name.  Value: image sequence name."""
    image_sequence_names = {}
    working_with_priors = False

    #Populate disk image sequence index if optional parameter is passed
    if args.drive_sequence_listing is not None:
        working_with_priors = True
        image_sequences = [[]]

        sequence_file = open(args.drive_sequence_listing, "r")
        for (line_no, line) in enumerate(sequence_file):
            line_cleaned = line.strip()
            if line_cleaned == "":
                image_sequences.append([])
            else:
                line_parts = line_cleaned.split("\t")
                image_sequences[-1].append(line_parts[0])
                image_sequence_numbers[line_parts[0]] = line_no
                if len(line_parts) > 1:
                    image_sequence_names[line_parts[0]] = line_parts[1]
        sequence_file.close()
        for image_sequence in image_sequences:
            last_image = None
            for image in image_sequence:
                image_sequence_priors[image] = last_image
                last_image = image

    #Produce a list of the RegXML files that completed
    #List does double-duty as a map from a regxml file to the hive file from which it was derived.
    successful_regxmls = {}
    successful_regxml_file = open(args.successful_regxml_list, "r")
    for line in successful_regxml_file:
        cleaned_line_parts = line.strip().split("\t")
        if len(cleaned_line_parts) == 2:
            hive_path = cleaned_line_parts[0]
            xml_path = cleaned_line_parts[1]
        elif len(cleaned_line_parts) == 0:
            continue
        else:
            raise Exception("Unexpected number of line components when reading hive-regxml mapping:\nrepr(line) = " + repr(line))
        successful_regxmls[hive_path] = xml_path
    if args.verbose:
        print("Successful hive file-RegXML pairs:")
        print("\n".join([(k,successful_regxmls[k]) for k in successful_regxmls]))

    #Produce a list of the images to use
    work_list_unordered = []

    image_list_file = open(args.hive_meta_list, "r")
    for line in image_list_file:
        cleaned_line = line.strip()
        if cleaned_line != "":
            hive_dump_path, image_file, dfxml_hive_path, hive_mtime, hive_atime, hive_ctime, hive_crtime = cleaned_line.split("\t")
            if hive_dump_path in successful_regxmls:
                regxml_path = successful_regxmls[hive_dump_path]
                if working_with_priors:
                    #We want all the input drives to have a prior image or None explicitly specified.  So, don't use .get().
                    prior_image = image_sequence_priors[image_file]
                else:
                    prior_image = None
                work_list_unordered.append({"regxml_path":regxml_path, "dfxml_hive_path":dfxml_hive_path, "image_file":image_file, "prior_image":prior_image, "mtime":hive_mtime, "atime":hive_atime, "ctime":hive_ctime, "crtime":hive_crtime, "image_sequence_number":image_sequence_numbers.get(image_file)})
    image_list_file.close()
    #Order by manifest listing.
    if working_with_priors:
        work_list = sorted(work_list_unordered, key=itemgetter("image_sequence_number"))
    else:
        #Ingest order will do fine in the single-image case.
        work_list = work_list_unordered
    if args.verbose:
        print("In-order work list we are processing:")
        print("\n".join(map(str, work_list)))

    #Begin the SQL database
    conn = sqlite3.connect(args.output_database_file)
    conn.isolation_level = "EXCLUSIVE"
    conn.row_factory = sqlite3.Row
    cursor = conn.cursor()
    
    #Begin the SQL tables
    cursor.execute(SQL_CREATE_TABLE_IMAGEANNO)
    cursor.execute(SQL_CREATE_TABLE_HIVEANALYSIS)
    cursor.execute(SQL_CREATE_TABLE_HIVES_FAILED)
    cursor.execute(SQL_CREATE_TABLE_CELLANALYSIS)
    cursor.execute(SQL_CREATE_INDEX_CELLANALYSIS_FULLPATH)

    #Populate
    for work_order in work_list:
        current_image_id = None
        #Maybe make a new image record
        cursor.execute("SELECT * FROM image_anno WHERE name = ?", (work_order["image_file"],))
        for row in cursor:
            current_image_id = row["image_id"]
            break
        if current_image_id == None:
            #Create new record
            image_anno_new_record = {}

            #image name
            image_anno_new_record["name"] = work_order["image_file"]

            #image sequence name
            image_anno_new_record["sequence_name"] = image_sequence_names.get(image_anno_new_record["name"])

            #image sequence prior
            cursor.execute("SELECT image_id FROM image_anno WHERE name = ?", (work_order["prior_image"],))
            for row in cursor:
                image_anno_new_record["sequence_prior_image"] = row["image_id"]
                break

            #Insert
            insert_db(cursor, "image_anno", image_anno_new_record)
            conn.commit()

            #Fetch fresh id
            for row in cursor.execute("SELECT * FROM image_anno WHERE rowid = ?;", (cursor.lastrowid,)):
                current_image_id = row["image_id"]

        #Make a new hive record
        dfxml_hive_path = work_order["dfxml_hive_path"]
        hive_type = hive_type_from_path(dfxml_hive_path, True)
        hive_sequence_name = hive_type_from_path(dfxml_hive_path, False)
        cursor.execute(
            "INSERT INTO hive_analysis(image_file, regxml_path, hive_file_path, hive_type, hive_sequence_name, mtime_file_system, atime_file_system, ctime_file_system, crtime_file_system) VALUES (?,?,?,?,?,?,?,?,?);",
            (work_order["image_file"], work_order["regxml_path"], dfxml_hive_path, hive_type, hive_sequence_name, work_order["mtime"], work_order["atime"], work_order["ctime"], work_order["crtime"])
        )
        conn.commit()
        
        #Get hive id
        current_hive_id = None
        cursor.execute("SELECT * FROM hive_analysis WHERE rowid = ?;", (cursor.lastrowid,))
        current_rec = cursor.fetchone()
        current_hive_id = current_rec["hive_id"]
        if current_hive_id == None:
            raise ValueError("Couldn't get last hive_id, somehow.")

        #Get previous hive in sequence
        previous_hive_id = None
        if working_with_priors:
            #Note we're not using .get() - we want an error raised if we have a broken sequence.
            previous_image_file = image_sequence_priors[work_order["image_file"]]
            for r in cursor.execute("SELECT hive_id FROM hive_analysis WHERE image_file = ? AND hive_file_path = ?", (previous_image_file, work_order["dfxml_hive_path"])):
                previous_hive_id = r["hive_id"]
            cursor.execute("UPDATE hive_analysis SET previous_hive_in_sequence = ? WHERE hive_id = ?;", (previous_hive_id, current_hive_id))

        #Commit updates for hive_analysis
        conn.commit()

        #Process the RegXML into cell records, capturing notes on failure
        reader = None
        try:
            reader = dfxml.read_regxml(xmlfile=open(work_order["regxml_path"], "rb"), callback=lambda co: process_regxml_callback_object(co, current_hive_id, previous_hive_id, cursor))
        except:
            sql_insert_failure = "INSERT INTO hives_failed(hive_id, cells_processed, error_text) VALUES (?, ?, ?);"
            cursor.execute(sql_insert_failure, (current_hive_id, hive_cell_proc_tallies[current_hive_id], traceback.format_exc()))
        conn.commit() #Ensure the last updates made it in

        #Update the hive and image records with the necessarily-computed times
        if reader is not None:
            image_updates = {}
            hive_column_value_updates = {}
            hive_column_value_updates["mtime_hive_root"] = str(reader.registry_object.mtime())
            if "mtime_latest_key" in dir(reader.registry_object):
                hive_column_value_updates["mtime_latest_key"] = str(reader.registry_object.mtime_latest_key)
            if "mtime_earliest_key" in dir(reader.registry_object):
                hive_column_value_updates["mtime_earliest_key"] = str(reader.registry_object.mtime_earliest_key)
            if "time_last_clean_shutdown" in dir(reader.registry_object):
                image_updates["last_clean_shutdown_time_hive"] = str(reader.registry_object.time_last_clean_shutdown)

            #Update tables
            update_db(conn, cursor, "hive_analysis", hive_column_value_updates, "hive_id", current_hive_id, True)
            update_db(conn, cursor, "image_anno", image_updates, "image_id", current_image_id, True)
        sys.stderr.write("Note:  Just finished with hive %d.\n" % current_hive_id)

    #TODO Also add to the where clause that this should not run on Vista systems.  This means digging for that key that notes where the system type is, I know Carvey noted it...

    #Now we have data...but possibly too much.
    cursor.execute("SELECT COUNT(*) FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);")
    row = cursor.fetchone()
    if row[0] > 0:
        sys.stderr.write("Note:  Deleting %d rows from cell_analysis due to processing for hives failing.\n" % row[0])
    cursor.execute("DELETE FROM cell_analysis WHERE hive_id IN (SELECT hive_id FROM hives_failed);")

    #Now it's just right.
    cursor.close()
    conn.close()