def process_files(fn, ws): row_idx = [2] # Callback function to process the SAX stream def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): ext = fi.ext() if ext: build_local_wb(ws, fi, row_idx[0]) row_idx[0] += 1 ''' ## NOTE: Original code preserved for reference and context. It was ## originally assumed that the xml file will have a .xml prefix. # if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML file fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb) ''' fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
def read_imagefile(self,fname): if args.nohash: fiwalk_args = "-z" else: fiwalk_args = "-zM" print("Reading file map by running fiwalk on {}".format(fname)) fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'),callback=self.process,fiwalk_args=fiwalk_args)
def process_files(fn, ws): row_idx = [2] # Callback function to process the SAX stream def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): ext = fi.ext() if ext: build_local_wb(ws, fi, row_idx[0]) row_idx[0] += 1 # Certain HFS volumes may have a "-" name_type. Check and continue: elif fi.name_type() == '-' and fi.meta_type == '1': ext = fi.ext() if ext: build_local_wb(ws, fi, row_idx[0]) row_idx[0] += 1 ''' ## NOTE: Original code preserved for reference and context. It was ## originally assumed that the xml file will have a .xml prefix. # if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML file fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb) ''' fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb)
def execute(self): if self.conf.get('commit'): logging.warning("Commit is ON. Will perform redactions..") else: logging.warning("Commit is OFF. Performing dry-run only..") if self.report_logger is not None: logtext = '' # for key, value in self.conf.items(): # logtext += key + ': ' + str(value) + ' ' # self.report_logger.info(_({'config': logtext})) logging.debug('DEBUG OUTPUT IS ON') # Copy input_file to output_file if not self.output_file.closed: self.output_file.close() if not self.input_file.closed: self.input_file.close() shutil.copy(self.input_file.name, self.output_file.name) self.output_file = open(self.output_file.name, 'r+') self.input_file = open(self.input_file.name, 'r') fiwalk.fiwalk_using_sax( imagefile=self.output_file, xmlfile=self.dfxml_file, callback=self.process_file) self.close_files() logging.warn("files closed")
def execute(self): if self.conf.get('commit'): logging.warning("Commit is ON. Will perform redactions..") else: logging.warning("Commit is OFF. Performing dry-run only..") if self.report_logger is not None: logtext = '' # for key, value in self.conf.items(): # logtext += key + ': ' + str(value) + ' ' # self.report_logger.info(_({'config': logtext})) logging.debug('DEBUG OUTPUT IS ON') # Copy input_file to output_file if not self.output_file.closed: self.output_file.close() if not self.input_file.closed: self.input_file.close() shutil.copy(self.input_file.name, self.output_file.name) self.output_file = open(self.output_file.name, 'r+') self.input_file = open(self.input_file.name, 'r') fiwalk.fiwalk_using_sax(imagefile=self.output_file, xmlfile=self.dfxml_file, callback=self.process_file) self.close_files() logging.warn("files closed")
def process(self,fname): self.prior_fname = self.current_fname self.current_fname = fname if fname.endswith("xml"): with open(fname,'rb') as xmlfile: for fi in dfxml.iter_dfxml(xmlfile, preserve_elements=True): self.process_fi(fi) else: fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)
def read_imagefile(self, fname): if args.nohash: fiwalk_args = "-z" else: fiwalk_args = "-zM" print("Reading file map by running fiwalk on {}".format(fname)) fiwalk.fiwalk_using_sax(imagefile=open(fname, 'rb'), callback=self.process, fiwalk_args=fiwalk_args)
def process(self, fname): self.prior_fname = self.current_fname self.current_fname = fname if fname.endswith("xml"): with open(fname, 'rb') as xmlfile: for fi in dfxml.iter_dfxml(xmlfile, preserve_elements=True): self.process_fi(fi) else: fiwalk.fiwalk_using_sax(imagefile=open(fname, 'rb'), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)
def test_frag_find(img, args, inodes, test_runs=1): if not os.path.exists(img): raise RuntimeError, img + " does not exist" fns = [] for inode in inodes: fn = "inode.%s" % str(inode) fns += [fn] if not os.path.exists(fn): cmd = ['icat'] if args: cmd += args cmd += [img, str(inode)] call(cmd, stdout=open(fn, "wb")) else: print "%s already exists" % fn times = [] pdfs = " ".join(["inode.%s" % str(inode) for inode in inodes]) for i in range(0, test_runs): t0 = time.time() call(['/usr/bin/time', './frag_find', '-xoutput.xml', img] + fns) t1 = time.time() times.append(t1 - t0) # ignore the first time # Now valdate the results def process(fi): disk_md5 = hashlib.md5(file(fi.filename()).read()).hexdigest() image_md5 = hashlib.md5(fi.contents(imagefile=file(img))).hexdigest() print "validating ", fi.filename() print "XML MD5: %s" % fi.md5() print "Disk MD5: %s" % disk_md5 print "Image MD5: %s" % image_md5 if fi.md5() != disk_md5 or fi.md5() != image_md5: print "fi.md5()=", fi.md5() print "disk_md5=", disk_md5 print "image_md5=", image_md5 raise RuntimeError, "*** Validation failed***" fiwalk.fiwalk_using_sax(xmlfile=file("output.xml"), callback=process) if len(times) > 0: print "" print "=========================" print "average time of %d trials (ignoring first): %f:" % ( len(times), sum(times) / len(times)) print "========================="
def test_frag_find(img,args,inodes,test_runs=1): if not os.path.exists(img): raise RuntimeError,img+" does not exist" fns = [] for inode in inodes: fn = "inode.%s" % str(inode) fns += [fn] if not os.path.exists(fn): cmd = ['icat'] if args: cmd += args cmd += [img,str(inode)] call(cmd,stdout=open(fn,"wb")) else: print "%s already exists" % fn times = [] pdfs = " ".join(["inode.%s" % str(inode) for inode in inodes]) for i in range(0,test_runs): t0=time.time() call(['/usr/bin/time','./frag_find','-xoutput.xml',img] + fns) t1=time.time() times.append(t1-t0) # ignore the first time # Now valdate the results def process(fi): disk_md5 = hashlib.md5(file(fi.filename()).read()).hexdigest() image_md5 = hashlib.md5(fi.contents(imagefile=file(img))).hexdigest() print "validating ",fi.filename() print "XML MD5: %s" % fi.md5() print "Disk MD5: %s" % disk_md5 print "Image MD5: %s" % image_md5 if fi.md5()!=disk_md5 or fi.md5()!=image_md5: print "fi.md5()=",fi.md5() print "disk_md5=",disk_md5 print "image_md5=",image_md5 raise RuntimeError, "*** Validation failed***" fiwalk.fiwalk_using_sax(xmlfile=file("output.xml"),callback=process) if len(times)>0: print "" print "=========================" print "average time of %d trials (ignoring first): %f:" % (len(times),sum(times)/len(times)) print "========================="
def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info): ''' # # Callback function to process the SAX stream for volume object # def cbv(fv): prtn_info['partition_offset'] = fv.partition_offset() prtn_info['block_count'] = fv.block_count() prtn_info['last_block'] = fv.last_block() prtn_info['first_block'] = fv.first_block() prtn_info['block_size'] = fv.block_count() prtn_info['ftype'] = fv.ftype() prtn_info['ftype_str'] = fv.ftype_str() # glb_image_info.append(prtn_info) # NOTE: The above will overwrite the list with the new element as # every list element! Dumb! glb_image_info.append({prtn_info['partition_offset'], \ prtn_info['block_count'], prtn_info['last_block'], \ prtn_info['first_block'], prtn_info['block_size'], \ prtn_info['ftype'], prtn_info['ftype_str']}) ## print("DEBUG:", glb_image_info) ## print("DEBUG: VolumeObject:", fv) ## print("DEBUG: Image Fileinfo: ", prtn_info) ''' # # Callback function to process the SAX stream for file object # def cb(fi): # Form a list of dictionaries of the file attributes from the # xml file. Each dictionary represents one file (FiwalkReport.fiDictList) bc_make_dict(fi, FiwalkReport, fn) xmlfile = open(fn, 'rb') # Currently we support taking only xml file as input. The following # check is for future enhancement. if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML fle fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)
def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info): ''' # # Callback function to process the SAX stream for volume object # def cbv(fv): prtn_info['partition_offset'] = fv.partition_offset() prtn_info['block_count'] = fv.block_count() prtn_info['last_block'] = fv.last_block() prtn_info['first_block'] = fv.first_block() prtn_info['block_size'] = fv.block_count() prtn_info['ftype'] = fv.ftype() prtn_info['ftype_str'] = fv.ftype_str() # glb_image_info.append(prtn_info) # NOTE: The above will overwrite the list with the new element as # every list element! Dumb! glb_image_info.append({prtn_info['partition_offset'], \ prtn_info['block_count'], prtn_info['last_block'], \ prtn_info['first_block'], prtn_info['block_size'], \ prtn_info['ftype'], prtn_info['ftype_str']}) ## print("DEBUG:", glb_image_info) ## print("DEBUG: VolumeObject:", fv) ## print("DEBUG: Image Fileinfo: ", prtn_info) ''' # # Callback function to process the SAX stream for file object # def cb(fi): # Form a list of dictionaries of the file attributes from the # xml file. Each dictionary represents one file (FiwalkReport.fiDictList) bc_make_dict(fi, FiwalkReport, fn) xmlfile = open(fn, 'rb') # Currently we support taking only xml file as input. The following # check is for future enhancement. if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML fle fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb) else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'), callback=cb)
def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info): ''' # # Callback function to process the SAX stream for volume object # def cbv(fv): prtn_info['partition_offset'] = fv.partition_offset() prtn_info['block_count'] = fv.block_count() prtn_info['last_block'] = fv.last_block() prtn_info['first_block'] = fv.first_block() prtn_info['block_size'] = fv.block_count() prtn_info['ftype'] = fv.ftype() prtn_info['ftype_str'] = fv.ftype_str() # glb_image_info.append(prtn_info) # NOTE: The above will overwrite the list with the new element as # every list element! Dumb! glb_image_info.append({prtn_info['partition_offset'], \ prtn_info['block_count'], prtn_info['last_block'], \ prtn_info['first_block'], prtn_info['block_size'], \ prtn_info['ftype'], prtn_info['ftype_str']}) ## print("DEBUG:", glb_image_info) ## print("DEBUG: VolumeObject:", fv) ## print("DEBUG: Image Fileinfo: ", prtn_info) ''' # # Callback function to process the SAX stream for file object # def cb(fi): # Form a list of dictionaries of the file attributes from the # xml file. Each dictionary represents one file (FiwalkReport.fiDictList) bc_make_dict(fi, FiwalkReport, fn) xmlfile = open(fn, 'rb') # We assume that we are processing a fiwalk XML fle fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) '''
def process_files(fn, ws): row_idx = [2] # Callback function to process the SAX stream def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): ext = fi.ext() if ext: build_local_wb(ws, fi, row_idx[0]) row_idx[0] += 1 if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML file fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)
def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info): ''' # # Callback function to process the SAX stream for volume object # def cbv(fv): prtn_info['partition_offset'] = fv.partition_offset() prtn_info['block_count'] = fv.block_count() prtn_info['last_block'] = fv.last_block() prtn_info['first_block'] = fv.first_block() prtn_info['block_size'] = fv.block_count() prtn_info['ftype'] = fv.ftype() prtn_info['ftype_str'] = fv.ftype_str() # glb_image_info.append(prtn_info) # NOTE: The above will overwrite the list with the new element as # every list element! Dumb! glb_image_info.append({prtn_info['partition_offset'], \ prtn_info['block_count'], prtn_info['last_block'], \ prtn_info['first_block'], prtn_info['block_size'], \ prtn_info['ftype'], prtn_info['ftype_str']}) ## print("DEBUG:", glb_image_info) ## print("DEBUG: VolumeObject:", fv) ## print("DEBUG: Image Fileinfo: ", prtn_info) ''' # # Callback function to process the SAX stream for file object # def cb(fi): # Form a list of dictionaries of the file attributes from the # xml file. Each dictionary represents one file (FiwalkReport.fiDictList) bc_make_dict(fi, FiwalkReport, fn) xmlfile = open(fn, 'rb') # We assume that we are processing a fiwalk XML fle fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb) '''
def execute(self): if self.conf.get('commit'): logging.info("Commit is ON. Will perform redactions..") else: logging.info("Commit is OFF. Performing dry-run only..") if self.report_logger is not None: self.report_logger.info('{') self.report_logger.info('"configuration": ') self.report_logger.info(_(self.kwargs)) self.report_logger.info(',') self.report_logger.info('"redactions": [') logging.debug('DEBUG OUTPUT IS ON') # Copy input_file to output_file if not self.output_file.closed: self.output_file.close() if not self.input_file.closed: self.input_file.close() import time t0 = time.time() # start a timer shutil.copy(self.input_file.name, self.output_file.name) self.output_file = open(self.output_file.name, 'r+') self.input_file = open(self.input_file.name, 'r') fiwalk.fiwalk_using_sax( imagefile=self.output_file, xmlfile=self.dfxml_file, callback=self.process_file) self.close_files() if self.redacted_count == 1: logging.info("Finished. 1 file was redacted.") else: logging.info("Finished. %d files were redacted." % self.redacted_count) elapsed = time.time() - t0 logging.debug("Time to run: %d seconds" % elapsed) if self.report_logger is not None: self.report_logger.info('],') self.report_logger.info('"runtime": %d' % elapsed) self.report_logger.info('}')
def bc_process_xmlfile_using_sax(FiwalkReport, fn, image_info): # # Callback function to process the SAX stream for volume object # def cbv(fv): image_info['partition_offset'] = fv.partition_offset() image_info['block_count'] = fv.block_count() image_info['last_block'] = fv.last_block() image_info['first_block'] = fv.first_block() image_info['block_size'] = fv.block_count() image_info['ftype'] = fv.ftype() image_info['ftype_str'] = fv.ftype_str() ## print("DEBUG: VolumeObject:", fv) ## print("DEBUG: Image Fileinfo: ", image_info) # # Callback function to process the SAX stream for file object # def cb(fi): # Form a list of dictionaries of the file attributes from the # xml file. Each dictionary represents one file (FiwalkReport.fiDictList) bc_make_dict(fi, FiwalkReport, fn) xmlfile = open(fn, 'rb') # Currently we support taking only xml file as input. The following # check is for future enhancement. if fn.endswith('xml'): # We use this call if we're processing a fiwalk XML fle fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb) r = fiwalk.fiwalk_vobj_using_sax(xmlfile=open(fn, 'rb'),callback=cbv) image_info['image_filename'] = r.imageobject._tags['image_filename'] else: # We use this call if we're processing a disk image fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)
def count_xml(self,prog,xmlfile): log.debug("count_xml(prog=%r, xmlfile=_)" % prog) try: fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile, "rb"), callback=self.process_fi) except: pass log.debug("count_xml: self.failed=%r" % self.failed) #Accumulate staged stats into succeeded or failed pile; #TODO This might not be the best volume counting. log.debug("count_xml: self.stats_staging = %r" % self.stats_staging) if self.failed: self.roll_stats(prog, self.stats_missed) log.debug("count_xml: self.stats_missed=%r" % self.stats_missed) self.stats_missed["images/" + prog] += 1 self.stats_missed["volumes/" + prog] += len(self.volumes) else: self.roll_stats(prog, self.stats_summary) log.debug("count_xml: self.stats_summary=%r" % self.stats_summary) self.stats_summary["images/" + prog] += 1 self.stats_summary["volumes/" + prog] += len(self.volumes) #Reset staging state. self.stats_staging = collections.defaultdict(lambda: 0) self.volumes = set() self.failed = False
sys.exit(1) imagefile = open(args[0], "r") annotated_runs = [] if options.debug: print("Read %d file objects from %s" % (len(fileobjects), imagefile.name)) def cb(fi): if options.debug: print("Read " + str(fi)) fragment_num = 1 for run in fi.byte_runs(): annotated_runs.append((run.img_offset, run, fragment_num, fi)) fragment_num += 1 fiwalk.fiwalk_using_sax(imagefile=imagefile, callback=cb) next_sector = 0 for (ip, run, fragment_num, fi) in sorted(annotated_runs): extra = "" fragment = "" start_sector = run.img_offset / 512 sector_count = int(run.bytes / 512) partial = run.bytes % 512 if not fi.allocated(): print("***") if not fi.file_present(): # it's not here! continue
def process_files(fn): drive_files = {} # index of drives all_parts = [] all_files = [] files_by_md5 = {} # a dictionary of sets of fiobject, indexed by md5 extension_len_histogram = histogram2d() extension_fragments_histogram = histogram2d() partition_histogram = histogram2d() def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): files_by_md5.get(fi.md5, set()).add(fi) ext = fi.ext() if not ext: print fi.meta_type(), fi extension_len_histogram.add(ext, fi.filesize()) extension_fragments_histogram.add(ext, fi.fragments()) partition_histogram.add(fi.partition(), fi.filesize()) if fn.endswith('xml'): fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=cb) else: fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=cb) # # Typeset the information # tab = ttable() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = [['Ext', 'Count', 'Average Size', 'Max', 'Std Dev']] tab.omit_row = [[0, '']] extension_len_histogram.statcol = ['iaverage', 'maxx', 'istddev'] print extension_len_histogram.typeset(tab=tab) # # Information about fragmentation patterns # tab = ttable() tab.header = "Fragmentation pattern by file system and file type:" tab.col_headings = [['Ext', 'Count', 'Average Size', 'Max', 'Std Dev']] tab.omit_row = [[0, '']] extension_fragments_histogram.statcol = ['iaverage', 'maxx', 'istddev'] print extension_fragments_histogram.typeset(tab=tab) exit(0) for fstype in fstypes: for ftype in ['jpg', 'pdf', 'doc', 'txt']: len1stats = statbag() len2stats = statbag() delta_hist = histogram() delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?") for i in filter((lambda (f): f.ext() == ftype and f.fragments == 2), all_files): runs = False if (hasattr(i, 'block_runs')): runs = i.block_runs if (hasattr(i, 'sector_runs')): runs = i.sector_runs if not runs: continue m = delta_re.search(runs) r = [] for j in range(1, 5): try: r.append(int(m.group(j))) except TypeError: r.append(int(m.group(j - 1))) len1 = r[1] - r[0] + 1 len2 = r[3] - r[2] + 1 delta = r[2] - r[1] len1stats.addx(len1) len2stats.addx(len2) delta_hist.add(delta) if len1stats.count() > 0: print "\n\n" print "fstype:", fstype, " ftype:", ftype print "len1 average: %f stddev: %f" % (len1stats.average(), len1stats.stddev()) print "len2 average: %f stddev: %f" % (len2stats.average(), len2stats.stddev()) print "delta average: %f" % delta_hist.average() print "delta histogram:" delta_hist.print_top(10)
def read_xmlfile(self,fname): if fname.endswith(".xml"): fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),callback=self.process) else: fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'),callback=self.process)
def ingest_dfxml(self, fname): fiwalk.fiwalk_using_sax(xmlfile=open(fname, 'rb'), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)
sys.stdout.write("Done.\n") sys.stdout.write("Creating copy of byte-runs-esque table for indexing...") #There isn't currently an index for the byte runs in TSK. outcur.execute(""" CREATE TABLE indexed_tsk_file_layout AS SELECT *, byte_start + byte_len AS byte_end FROM tsk.tsk_file_layout ORDER BY byte_start, byte_end, obj_id; """) outcur.execute("CREATE INDEX itfl_start ON indexed_tsk_file_layout(byte_start);") outcur.execute("CREATE INDEX itfl_end ON indexed_tsk_file_layout(byte_end);") if args.verbose: sys.stdout.write("Done.\n") sys.stdout.flush() #Process DFXML with open(args.dfxml, "rb") as xmlfh: fiwalk.fiwalk_using_sax(xmlfile=xmlfh, callback=proc_dfxml) if args.verbose: sys.stdout.write("Done.\n") #Cleanup outconn.commit() outcur.execute("DETACH DATABASE tsk;") outcur.close() tskcur.close()
fi.filename(), fi.mtime(), fi.atime(), fi.ctime(), fi.crtime() ]))) outfile = open(outfilename, "wb") outfile.write(fi.contents()) outfile.close() if hivexml_command: command_string = hivexml_command + " " + outfilename + " >" + outfilename+".regxml" + " 2>" + outfilename + ".err.log" sysrc = os.system(command_string) if sysrc: sys.stderr.write("Error, see err.log: " + command_string + "\n") if __name__=="__main__": global hivexml_command global imageabspath parser = argparse.ArgumentParser(description="Find registry files in imagefile and dump hives to files in pwd in the order they're encountered, with a manifest printed to stdout.") parser.add_argument("-x", "--xml", dest="dfxml_file_name", help="Already-created DFXML file for imagefile") parser.add_argument("--hivexml", dest="hivexml_command", action="store_const", const="hivexml", default="", help="Run hivexml command on each hive, producing output at <hive>.regxml, stderr at <hive>.err.log") parser.add_argument("imagefilename", help="Image file") args = parser.parse_args() hivexml_command = args.hivexml_command xmlfh = None if args.dfxml_file_name != None: xmlfh = open(args.dfxml_file_name, "rb") imageabspath = os.path.abspath(args.imagefilename) fiwalk.fiwalk_using_sax(imagefile=open(imageabspath, "r"), xmlfile=xmlfh, callback=proc_dfxml)
def process_featurefile(args, report, featurefile): # Counters for the summary report global file_count features = featuredb() unallocated_count = 0 feature_count = 0 features_compressed = 0 located_count = 0 unicode_encode_errors = 0 unicode_decode_errors = 0 file_count = 0 ofn = os.path.join(args.outdir, ("annotated_" + featurefile)) if os.path.exists(ofn): raise RuntimeError(ofn + " exists") of = open(ofn, "wb") # First read the feature files print("Adding features from " + featurefile) try: linenumber = 0 for line in report.open(featurefile, mode='rb'): # Read the file in binary and convert to unicode if possible linenumber += 1 if bulk_extractor_reader.is_comment_line(line): continue try: fset = features.add_featurefile_line(line[0:-1]) feature_count += 1 if (b"ZIP" in fset[0]) or (b"HIBER" in fset[0]): features_compressed += 1 del fset except ValueError: raise RuntimeError( "Line {} in feature file {} is invalid: {}".format( linenumber, featurefile, line)) except IOError: print("Error: Failed to open feature file '%s'" % fn) exit(1) if args.debug: print('') features.print_debug() # feature2fi is a map each feature to the file in which it was found feature2fi = {} ################################################################ # If we got features in the featuredb, find out the file that each one came from # by scanning all of the files and, for each byte run, indicating the features # that are within the byte run if features.count() > 0: global filecount def process(fi): global file_count file_count += 1 if args.verbose or args.debug: print("%d %s (%d fragments)" % (file_count, fi.filename(), fi.fragments())) for run in fi.byte_runs(): for (offset, fset) in features.search(run): if args.debug: print(" run={} offset={} fset={} ".format( run, offset, fset)) feature2fi[findex( fset )] = fi # for each of those features, not that it is in this file if file_count % 1000 == 0: print("Processed %d fileobjects in DFXML file" % file_count) xmlfile = None if args.xmlfile: xmlfile = args.xmlfile else: if args.imagefile: imagefile = args.imagefile else: imagefile = report.imagefile() # See if there is an xmlfile (root, ext) = os.path.splitext(imagefile) possible_xmlfile = root + ".xml" if os.path.exists(possible_xmlfile): xmlfile = possible_xmlfile if xmlfile: print("Using XML file " + xmlfile) fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile, 'rb'), callback=process) else: print("Running fiwalk on " + imagefile) fiwalk.fiwalk_using_sax(imagefile=open(imagefile, 'rb'), callback=process) else: print("No features found; copying feature file") ################################################################ print("Generating output...") # Now print all of the features if args.terse: of.write(b"# Position\tFeature\tFilename\n") else: of.write(b"# Position\tFeature\tContext\tFilename\tFile MD5\n") for (offset, fset) in features: try: of.write(fset[0]) # pos of.write(b"\t") of.write(fset[1]) # feature of.write(b"\t") try: if not args.terse: of.write(fset[2]) # context except IndexError: pass # no context try: fi = feature2fi[findex(fset)] of.write(b"\t") if fi.filename(): of.write(fi.filename().encode('utf-8')) if args.debug: print("pos=", offset, "feature=", fset[1], "fi=", fi, "fi.filename=", fi.filename()) if not args.terse: of.write(b"\t") if fi.md5(): of.write(fi.md5().encode('utf-8')) located_count += 1 except KeyError: unallocated_count += 1 pass # cannot locate of.write(b"\n") except UnicodeEncodeError: unicode_encode_errors += 1 of.write(b"\n") except UnicodeDecodeError: unicode_decode_errors += 1 of.write(b"\n") # stop the timer used to calculate the total run time t1 = time.time() # Summary report for (title, value) in [ ["# Total features input: {}", feature_count], ["# Total features located to files: {}", located_count], ["# Total features in unallocated space: {}", unallocated_count], ["# Total features in compressed regions: {}", features_compressed], ["# Unicode Encode Errors: {}", unicode_encode_errors], ["# Unicode Decode Errors: {}", unicode_decode_errors], ["# Total processing time: {:.2} seconds", t1 - t0] ]: of.write((title + "\n").format(value).encode('utf-8'))
#!/usr/bin/python """Usage: igrep imagefile.iso string ... Reports the files in which files have the string. """ import fiwalk,dfxml if __name__=="__main__": import sys from optparse import OptionParser parser = OptionParser() parser.usage = '%prog [options] image.iso s1' parser.add_option("-d","--debug",help="debug",action="store_true") (options,args) = parser.parse_args() if len(args)!=2: parser.print_help() sys.exit(1) (imagefn,data) = args def process(fi): offset = fi.contents().find(data) if offset>0: print "%s (offset=%d)" % (fi.filename(),offset) fiwalk.fiwalk_using_sax(imagefile=open(imagefn),callback=process)
def bcProcessDfxmlFileUsingSax(self, dfxmlfile): fiwalk.fiwalk_using_sax(xmlfile=open(dfxmlfile, 'rb'),callback=self.cb)
def read_xmlfile(self,fname): print("Reading file map from XML file {}".format(fname)) fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),callback=self.process)
def process_files(fn): drive_files = {} # index of drives all_parts = [] all_files = [] files_by_md5 = {} # a dictionary of sets of fiobject, indexed by md5 extension_len_histogram = histogram2d() extension_fragments_histogram = histogram2d() partition_histogram = histogram2d() def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): files_by_md5.get(fi.md5,set()).add(fi) ext = fi.ext() if not ext: print(fi.meta_type(),fi) extension_len_histogram.add(ext,fi.filesize()) extension_fragments_histogram.add(ext,fi.fragments()) partition_histogram.add(fi.partition(),fi.filesize()) if fn.endswith('xml'): fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=cb) else: fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=cb) # # Typeset the information # tab = ttable() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_len_histogram.statcol = ['iaverage','maxx','istddev'] print(extension_len_histogram.typeset(tab=tab)) # # Information about fragmentation patterns # tab = ttable() tab.header="Fragmentation pattern by file system and file type:" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_fragments_histogram.statcol = ['iaverage','maxx','istddev'] print(extension_fragments_histogram.typeset(tab=tab)) exit(0) for fstype in fstypes: for ftype in ['jpg','pdf','doc','txt']: len1stats = statbag() len2stats = statbag() delta_hist = histogram() delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?") for i in filter((lambda f: f.ext()==ftype and f.fragments==2),all_files): runs = False if(hasattr(i,'block_runs')): runs = i.block_runs if(hasattr(i,'sector_runs')): runs = i.sector_runs if not runs: continue m = delta_re.search(runs) r = [] for j in range(1,5): try: r.append(int(m.group(j))) except TypeError: r.append(int(m.group(j-1))) len1 = r[1] - r[0] + 1 len2 = r[3] - r[2] + 1 delta = r[2]-r[1] len1stats.addx(len1) len2stats.addx(len2) delta_hist.add(delta) if len1stats.count()>0: print("\n\n") print("fstype:",fstype," ftype:",ftype) print("len1 average: %f stddev: %f" % (len1stats.average(),len1stats.stddev())) print("len2 average: %f stddev: %f" % (len2stats.average(),len2stats.stddev())) print("delta average: %f" % delta_hist.average()) print("delta histogram:") delta_hist.print_top(10) exit(0) print("Partition histogram:") partition_histogram.print_top(n=100) print("Counts by extension:") extension_len_histogram.print_top(n=100) print("Fragments by extension:") extension_fragments_histogram.print_top(n=100) exit(0) for fstype in fstypes: if fstype=='(unrecognized)': continue print(fstype,"Partitions:") def isfstype(x): return x.fstype==fstype these_parts = filter(isfstype,all_parts) these_files = [] for part in these_parts: these_files.extend(part.files) print(fragmentation_table(these_files)) exit(0) sys.exit(0) # # Typeset information about file extensions # hist_exts = histogram2d() hist_exts.topn = 20 for i in all_files: if i.size>0 and i.fragments>0: hist_exts.add(i.ext(),i.size) tab = table() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = ['Ext','Count','Average Size','Max','Std Dev'] tab.omit_row = [[0,'']] hist_exts.statcol = ['iaverage','maxx','istddev'] print(hist_exts.typeset(t=tab)) hist_exts = histogram2d() hist_exts.topn = 20 for i in all_files: if i.fragments>0: hist_exts.add(i.ext(),i.fragments) tab = table() tab.header = "Fragmentation by file extension (suppressing files with 0 fragments)" tab.col_headings = ['Ext','Count','Avg Fragments','Max','Std Dev'] tab.omit_row = [[0,'']] hist_exts.statcol = ['average','maxx','stddev'] print(hist_exts.typeset(t=tab)) print("===========================") # # Typeset the File Systems on Drives table # tab = table() tab.header = "File Systems on Drives" tab.col_headings = ["FS Type","Drives","MBytes"] tab.col_totals = [1,2] fstypeh.statcol = 'sumx' print(fstypeh.typeset(t=tab)) # # Typeset overall fragmentation stats # print(fragmentation_table(all_files))
import fiwalk,math total = 0 total2 = 0 count = 0 def func(fi): global total,total2,count if fi.ext()=='txt': total += fi.filesize() total2 += fi.filesize() ** 2 count += 1 fiwalk.fiwalk_using_sax(imagefile=open("small.dmg"),callback=func) print "count=",count print "average=",total/count print "stddev=",math.sqrt(total2/count - (total/count)**2)
if __name__ == "__main__": import sys from optparse import OptionParser from sys import stdout parser = OptionParser() parser.usage = '%prog [options] (xmlfile or imagefile)' (options, args) = parser.parse_args() if not args: parser.print_usage() exit(1) sizes = [] dates = {} def callback(fi): sizes.append(fi.filesize()) for (tag, val) in (fi.times().iteritems()): date = val.datetime() dates[date] = dates.get(date, 0) + 1 fn = args[0] if fn.endswith(".xml"): fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=callback) else: fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=callback) print("Here is the dates array:") for d in sorted(dates.keys()): print("{} {}".format(d, dates[d]))
def process(self,fname): self.current_fname = fname if fname.endswith(".xml"): fiwalk.fiwalk_using_sax(xmlfile=open(infile), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi) else: fiwalk.fiwalk_using_sax(imagefile=open(infile), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)
def read_xmlfile(self, fname): print("Reading file map from XML file {}".format(fname)) fiwalk.fiwalk_using_sax(xmlfile=open(fname, 'rb'), callback=self.process)
parser.usage = '%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile' (options, args) = parser.parse_args() if len(args) < 3: parser.print_help() exit(1) imagefilename = args[0] xmlfilename = options.xmlfilename xmlfh = None if xmlfilename != None: xmlfh = open(xmlfilename, "r") zipfilename = args[1] targets = set([fn.lower() for fn in args[2:]]) zfile = zipfile.ZipFile(zipfilename, "w", allowZip64=True) def proc(fi): basename = os.path.basename(fi.filename()).lower() if basename in targets: info = zipfile.ZipInfo( fi.filename(), datetime.datetime.fromtimestamp( fi.mtime().timestamp()).utctimetuple()) info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info, fi.contents()) fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc)
if len(args)<1: parser.print_help() sys.exit(1) imagefile = open(args[0],"r") annotated_runs = [] if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name)) def cb(fi): if options.debug: print("Read "+str(fi)) fragment_num = 1 for run in fi.byte_runs(): annotated_runs.append((run.img_offset,run,fragment_num,fi)) fragment_num += 1 fiwalk.fiwalk_using_sax(imagefile=imagefile,callback=cb) next_sector = 0 for (ip,run,fragment_num,fi) in sorted(annotated_runs): extra = "" fragment = "" start_sector = run.img_offset/512 sector_count = int(run.bytes/512) partial = run.bytes % 512 if not fi.allocated(): print("***") if not fi.file_present(): # it's not here! continue
import time if __name__=="__main__": import sys from optparse import OptionParser from sys import stdout parser = OptionParser() parser.usage = '%prog [options] xmlfile ' (options,args) = parser.parse_args() sizes = [] dates = {} def callback(fi): sizes.append(fi.filesize()) for (tag,val) in (fi.times().iteritems()): date = val.datetime() dates[date] = dates.get(date,0)+1 fiwalk.fiwalk_using_sax(xmlfile=open(args[0],"r"),callback=callback) try: import pylab pylab.grid() pylab.hist(times,100) pylab.show() except ImportError: print("pylab not installed.") print("Date\tActivity Count:") for date in sorted(dates.keys()): print("%s\t%d" % (date,dates[date]))
def ingest_dfxml(self,fname): fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),flags=fiwalk.ALLOC_ONLY,callback=self.process_fi)
if __name__=="__main__": from optparse import OptionParser parser = OptionParser() parser.add_option("-x", "--xml", dest="xmlfilename", help="Already-created DFXML file for imagefile") parser.usage = '%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile' (options,args) = parser.parse_args() if len(args)<3: parser.print_help() exit(1) imagefilename = args[0] xmlfilename = options.xmlfilename xmlfh = None if xmlfilename != None: xmlfh = open(xmlfilename, "r") zipfilename = args[1] targets = set([fn.lower() for fn in args[2:]]) zfile = zipfile.ZipFile(zipfilename,"w",allowZip64=True) def proc(fi): basename = os.path.basename(fi.filename()).lower() if basename in targets: info = zipfile.ZipInfo(fi.filename(),datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple()) info.internal_attr = 1 info.external_attr = 2175008768 # specifies mode 0644 zfile.writestr(info,fi.contents()) fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc)
from optparse import OptionParser from subprocess import Popen,PIPE global options,xml_out from glob import glob parser = OptionParser() parser.usage = "%prog [options] imagefile" parser.add_option("-d","--debug",help="prints debugging info",dest="debug",action="store_true") parser.add_option("-c","--commit",help="Really do the redaction",action="store_true") parser.add_option("--all",help="Do all",action="store_true") (options,args) = parser.parse_args() # First read all of the redaction files for fn in glob("*redacted.xml*"): try: fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=lambda fi:redact_filenames.add(fi.filename())) except xml.parsers.expat.ExpatError: print "Invalid XML file:",fn print "number of filenames in redaction XML:",len(redact_filenames) if options.all: for fn in glob("*.aff"): raw = fn.replace(".aff",".raw") if not os.path.exists(raw): print "%s --> %s" % (fn,raw) if call(['afconvert','-e','raw',fn])!=0: raise RuntimeError,"afconvert of %s failed" % fn fns = glob("*.raw") else: fns = args
def main(): fiwalk.fiwalk_using_sax(imagefile=file(IMAGE), callback=index_fobj) SOLR.commit()
self.image_file.close() if self.xml_file and self.xml_file.closed == False: print "closing file %s" % self.xml_file.name self.xml_file.close() if __name__=="__main__": import sys,time from optparse import OptionParser from subprocess import Popen,PIPE global options parser = OptionParser() parser.usage = "%prog [options] config-file" parser.add_option("-d","--debug",help="prints debugging info",dest="debug") (options,args) = parser.parse_args() t0 = time.time() # Read the redaction configuration file rc = RedactConfig(args[0]) if not rc.imagefile: print "Error: a filename must be specified in the redaction config file" sys.exit(1) fiwalk.fiwalk_using_sax(imagefile=rc.imagefile,xmlfile=rc.xmlfile,callback=rc.process_file) t1 = time.time() rc.close_files() print "Time to run: %d seconds" % (t1-t0)
def process_featurefile(args,report,featurefile): # Counters for the summary report global file_count features = featuredb() unallocated_count = 0 feature_count = 0 features_compressed = 0 located_count = 0 unicode_encode_errors = 0 unicode_decode_errors = 0 file_count = 0 ofn = os.path.join(args.outdir,("annotated_" + featurefile )) if os.path.exists(ofn): raise RuntimeError(ofn+" exists") of = open(ofn,"wb") # First read the feature files print("Adding features from "+featurefile) try: linenumber = 0 for line in report.open(featurefile,mode='rb'): # Read the file in binary and convert to unicode if possible linenumber += 1 if bulk_extractor_reader.is_comment_line(line): continue try: fset = features.add_featurefile_line(line[0:-1]) feature_count += 1 if (b"ZIP" in fset[0]) or (b"HIBER" in fset[0]): features_compressed += 1 del fset except ValueError: raise RuntimeError("Line {} in feature file {} is invalid: {}".format(linenumber,featurefile,line)) except IOError: print("Error: Failed to open feature file '%s'" % fn) exit(1) if args.debug: print('') features.print_debug() # feature2fi is a map each feature to the file in which it was found feature2fi = {} ################################################################ # If we got features in the featuredb, find out the file that each one came from # by scanning all of the files and, for each byte run, indicating the features # that are within the byte run if features.count()>0: global filecount def process(fi): global file_count file_count += 1 if args.verbose or args.debug: print("%d %s (%d fragments)" % (file_count,fi.filename(),fi.fragments())) for run in fi.byte_runs(): for (offset,fset) in features.search(run): if args.debug: print(" run={} offset={} fset={} ".format(run,offset,fset)) feature2fi[findex(fset)] = fi # for each of those features, not that it is in this file if file_count%1000==0: print("Processed %d fileobjects in DFXML file" % file_count) xmlfile = None if args.xmlfile: xmlfile = args.xmlfile else: if args.imagefile: imagefile = args.imagefile else: imagefile = report.imagefile() # See if there is an xmlfile (root,ext) = os.path.splitext(imagefile) possible_xmlfile = root+".xml" if os.path.exists(possible_xmlfile): xmlfile = possible_xmlfile if xmlfile: print("Using XML file "+xmlfile) fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile,'rb'),callback=process) else: print("Running fiwalk on " + imagefile) fiwalk.fiwalk_using_sax(imagefile=open(imagefile,'rb'),callback=process) else: print("No features found; copying feature file") ################################################################ print("Generating output...") # Now print all of the features if args.terse: of.write(b"# Position\tFeature\tFilename\n") else: of.write(b"# Position\tFeature\tContext\tFilename\tFile MD5\n") for (offset,fset) in features: try: of.write(fset[0]) # pos of.write(b"\t") of.write(fset[1]) # feature of.write(b"\t") try: if not args.terse: of.write(fset[2]) # context except IndexError: pass # no context try: fi = feature2fi[findex(fset)] of.write(b"\t") if fi.filename(): of.write(fi.filename().encode('utf-8')) if args.debug: print("pos=",offset,"feature=",fset[1],"fi=",fi,"fi.filename=",fi.filename()) if not args.terse: of.write(b"\t") if fi.md5(): of.write(fi.md5().encode('utf-8')) located_count += 1 except KeyError: unallocated_count += 1 pass # cannot locate of.write(b"\n") except UnicodeEncodeError: unicode_encode_errors += 1 of.write(b"\n") except UnicodeDecodeError: unicode_decode_errors += 1 of.write(b"\n") # stop the timer used to calculate the total run time t1 = time.time() # Summary report for (title,value) in [["# Total features input: {}",feature_count], ["# Total features located to files: {}",located_count], ["# Total features in unallocated space: {}",unallocated_count], ["# Total features in compressed regions: {}",features_compressed], ["# Unicode Encode Errors: {}",unicode_encode_errors], ["# Unicode Decode Errors: {}",unicode_decode_errors], ["# Total processing time: {:.2} seconds",t1-t0]]: of.write((title+"\n").format(value).encode('utf-8'))
if __name__ == "__main__": import sys, time from optparse import OptionParser from subprocess import Popen, PIPE global options parser = OptionParser() parser.usage = "%prog [options] config-file" parser.add_option("-d", "--debug", help="prints debugging info", dest="debug") (options, args) = parser.parse_args() t0 = time.time() # Read the redaction configuration file rc = RedactConfig(args[0]) if not rc.imagefile: print "Error: a filename must be specified in the redaction config file" sys.exit(1) fiwalk.fiwalk_using_sax(imagefile=rc.imagefile, xmlfile=rc.xmlfile, callback=rc.process_file) t1 = time.time() rc.close_files() print "Time to run: %d seconds" % (t1 - t0)
if __name__=="__main__": import sys from optparse import OptionParser from sys import stdout parser = OptionParser() parser.usage = '%prog [options] (xmlfile or imagefile)' (options,args) = parser.parse_args() if not args: parser.print_usage() exit(1) sizes = [] dates = {} def callback(fi): sizes.append(fi.filesize()) for (tag,val) in (fi.times().iteritems()): date = val.datetime() dates[date] = dates.get(date,0)+1 fn = args[0] if fn.endswith(".xml"): fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=callback) else: fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=callback) print("Here is the dates array:") for d in sorted(dates.keys()): print("{} {}".format(d,dates[d]))
def process_files(fn): drive_files = {} # index of drives all_parts = [] all_files = [] files_by_md5 = {} # a dictionary of sets of fiobject, indexed by md5 extension_len_histogram = histogram2d() extension_fragments_histogram = histogram2d() partition_histogram = histogram2d() def cb(fi): # add the md5 to the set if fi.is_file() and fi.filesize(): files_by_md5.get(fi.md5,set()).add(fi) ext = fi.ext() if not ext: print fi.meta_type(),fi extension_len_histogram.add(ext,fi.filesize()) extension_fragments_histogram.add(ext,fi.fragments()) partition_histogram.add(fi.partition(),fi.filesize()) if fn.endswith('xml'): fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=cb) else: fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=cb) # # Typeset the information # tab = ttable() tab.header = "File extension popularity and average size (suppressing 0-len files)" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_len_histogram.statcol = ['iaverage','maxx','istddev'] print extension_len_histogram.typeset(tab=tab) # # Information about fragmentation patterns # tab = ttable() tab.header="Fragmentation pattern by file system and file type:" tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']] tab.omit_row = [[0,'']] extension_fragments_histogram.statcol = ['iaverage','maxx','istddev'] print extension_fragments_histogram.typeset(tab=tab) exit(0) for fstype in fstypes: for ftype in ['jpg','pdf','doc','txt']: len1stats = statbag() len2stats = statbag() delta_hist = histogram() delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?") for i in filter( (lambda(f): f.ext()==ftype and f.fragments==2),all_files): runs = False if(hasattr(i,'block_runs')): runs = i.block_runs if(hasattr(i,'sector_runs')): runs = i.sector_runs if not runs: continue m = delta_re.search(runs) r = [] for j in range(1,5): try: r.append(int(m.group(j))) except TypeError: r.append(int(m.group(j-1))) len1 = r[1] - r[0] + 1 len2 = r[3] - r[2] + 1 delta = r[2]-r[1] len1stats.addx(len1) len2stats.addx(len2) delta_hist.add(delta) if len1stats.count()>0: print "\n\n" print "fstype:",fstype," ftype:",ftype print "len1 average: %f stddev: %f" % (len1stats.average(),len1stats.stddev()) print "len2 average: %f stddev: %f" % (len2stats.average(),len2stats.stddev()) print "delta average: %f" % delta_hist.average() print "delta histogram:" delta_hist.print_top(10)
"--debug", help="prints debugging info", dest="debug", action="store_true") parser.add_option("-c", "--commit", help="Really do the redaction", action="store_true") parser.add_option("--all", help="Do all", action="store_true") (options, args) = parser.parse_args() # First read all of the redaction files for fn in glob("*redacted.xml*"): try: fiwalk.fiwalk_using_sax( xmlfile=open(fn), callback=lambda fi: redact_filenames.add(fi.filename())) except xml.parsers.expat.ExpatError: print "Invalid XML file:", fn print "number of filenames in redaction XML:", len(redact_filenames) if options.all: for fn in glob("*.aff"): raw = fn.replace(".aff", ".raw") if not os.path.exists(raw): print "%s --> %s" % (fn, raw) if call(['afconvert', '-e', 'raw', fn]) != 0: raise RuntimeError, "afconvert of %s failed" % fn fns = glob("*.raw") else: fns = args
#output is to stdout outfile = sys.stdout #find partition information, blocksize and filesystem #1st partition has no. 1, to correspond to fiwalk output partitioncounter = 0 f.write("********************************** PARTITIONS **********************************") f.write("\nNo\tBlocksize\tFilesystem\n") for line in f: if re.search("block_size", line): partitioncounter += 1 f_out.write(str(partitioncounter)) f_out.write("\t") f_out.write(re.split(">|<", line)[2]) if re.search("ftype_str", line): f_out.write("\t\t") f_out.write(re.split(">|<", line)[2]) f_out.write("\n") f_out.write("\n\n************************************* DATA *************************************\n") f_out.write("Partition\tFilename\tSize\tTimestamps\n") f.close() #re-open file for binary reading #file processing f = open(file_name, "rb") fiwalk.fiwalk_using_sax(xmlfile=f,callback=proc)