Python fiwalk_using_sax 예제들, fiwalk.fiwalk_using_sax Python 예제들

예제 #1

0

파일 보기

파일: bc_genrep_xls.py 프로젝트: sunithamisra/bitcurator

def process_files(fn, ws):

    row_idx = [2]

    # Callback function to process the SAX stream
    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            ext = fi.ext()
            if ext:
                build_local_wb(ws, fi, row_idx[0])
                row_idx[0] += 1

    '''
    ## NOTE: Original code preserved for reference and context. It was
    ## originally assumed that the xml file will have a .xml prefix.
    #
    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML file
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)
    '''
    fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)

예제 #2

0

파일 보기

파일: identify_filenames.py 프로젝트: andy737/bulk_extractor

 def read_imagefile(self,fname):
     if args.nohash:
         fiwalk_args = "-z"
     else:
         fiwalk_args = "-zM"
     print("Reading file map by running fiwalk on {}".format(fname))
     fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'),callback=self.process,fiwalk_args=fiwalk_args)

예제 #3

0

파일 보기

파일: bc_genrep_xls.py 프로젝트: cy-fir/bitcurator

def process_files(fn, ws):

    row_idx = [2]

    # Callback function to process the SAX stream
    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            ext = fi.ext()
            if ext:
                build_local_wb(ws, fi, row_idx[0])
                row_idx[0] += 1
        # Certain HFS volumes may have a "-" name_type. Check and continue:
        elif fi.name_type() == '-' and fi.meta_type == '1':
            ext = fi.ext()
            if ext:
                build_local_wb(ws, fi, row_idx[0])
                row_idx[0] += 1

    '''
    ## NOTE: Original code preserved for reference and context. It was
    ## originally assumed that the xml file will have a .xml prefix.
    #
    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML file
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)
    '''
    fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb)

예제 #4

0

파일 보기

파일: redact.py 프로젝트: gregjan/bca-redtools

    def execute(self):
        if self.conf.get('commit'):
            logging.warning("Commit is ON. Will perform redactions..")
        else:
            logging.warning("Commit is OFF. Performing dry-run only..")
        if self.report_logger is not None:
            logtext = ''
            # for key, value in self.conf.items():
            # logtext += key + ': ' + str(value) + '  '
            # self.report_logger.info(_({'config': logtext}))
        logging.debug('DEBUG OUTPUT IS ON')
        # Copy input_file to output_file
        if not self.output_file.closed:
            self.output_file.close()
        if not self.input_file.closed:
            self.input_file.close()
        shutil.copy(self.input_file.name, self.output_file.name)
        self.output_file = open(self.output_file.name, 'r+')
        self.input_file = open(self.input_file.name, 'r')

        fiwalk.fiwalk_using_sax(
            imagefile=self.output_file,
            xmlfile=self.dfxml_file,
            callback=self.process_file)
        self.close_files()
        logging.warn("files closed")

예제 #5

0

파일 보기

    def execute(self):
        if self.conf.get('commit'):
            logging.warning("Commit is ON. Will perform redactions..")
        else:
            logging.warning("Commit is OFF. Performing dry-run only..")
        if self.report_logger is not None:
            logtext = ''
            # for key, value in self.conf.items():
            # logtext += key + ': ' + str(value) + '  '
            # self.report_logger.info(_({'config': logtext}))
        logging.debug('DEBUG OUTPUT IS ON')
        # Copy input_file to output_file
        if not self.output_file.closed:
            self.output_file.close()
        if not self.input_file.closed:
            self.input_file.close()
        shutil.copy(self.input_file.name, self.output_file.name)
        self.output_file = open(self.output_file.name, 'r+')
        self.input_file = open(self.input_file.name, 'r')

        fiwalk.fiwalk_using_sax(imagefile=self.output_file,
                                xmlfile=self.dfxml_file,
                                callback=self.process_file)
        self.close_files()
        logging.warn("files closed")

예제 #6

0

파일 보기

파일: idifference.py 프로젝트: Acidburn0zzz/dfxml

 def process(self,fname):
     self.prior_fname = self.current_fname
     self.current_fname = fname
     if fname.endswith("xml"):
         with open(fname,'rb') as xmlfile:
             for fi in dfxml.iter_dfxml(xmlfile, preserve_elements=True):
                 self.process_fi(fi)
     else:
         fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)

예제 #7

0

파일 보기

 def read_imagefile(self, fname):
     if args.nohash:
         fiwalk_args = "-z"
     else:
         fiwalk_args = "-zM"
     print("Reading file map by running fiwalk on {}".format(fname))
     fiwalk.fiwalk_using_sax(imagefile=open(fname, 'rb'),
                             callback=self.process,
                             fiwalk_args=fiwalk_args)

예제 #8

0

파일 보기

파일: idifference.py 프로젝트: SteveClement/dfxml

 def process(self, fname):
     self.prior_fname = self.current_fname
     self.current_fname = fname
     if fname.endswith("xml"):
         with open(fname, 'rb') as xmlfile:
             for fi in dfxml.iter_dfxml(xmlfile, preserve_elements=True):
                 self.process_fi(fi)
     else:
         fiwalk.fiwalk_using_sax(imagefile=open(fname, 'rb'),
                                 flags=fiwalk.ALLOC_ONLY,
                                 callback=self.process_fi)

예제 #9

0

파일 보기

파일: test_frag_find.py 프로젝트: pombredanne/frag_find

def test_frag_find(img, args, inodes, test_runs=1):

    if not os.path.exists(img):
        raise RuntimeError, img + " does not exist"

    fns = []
    for inode in inodes:
        fn = "inode.%s" % str(inode)
        fns += [fn]
        if not os.path.exists(fn):
            cmd = ['icat']
            if args:
                cmd += args
            cmd += [img, str(inode)]
            call(cmd, stdout=open(fn, "wb"))
        else:
            print "%s already exists" % fn

    times = []
    pdfs = " ".join(["inode.%s" % str(inode) for inode in inodes])
    for i in range(0, test_runs):
        t0 = time.time()
        call(['/usr/bin/time', './frag_find', '-xoutput.xml', img] + fns)
        t1 = time.time()
        times.append(t1 - t0)  # ignore the first time

    # Now valdate the results
    def process(fi):
        disk_md5 = hashlib.md5(file(fi.filename()).read()).hexdigest()
        image_md5 = hashlib.md5(fi.contents(imagefile=file(img))).hexdigest()
        print "validating ", fi.filename()
        print "XML MD5:   %s" % fi.md5()
        print "Disk MD5:  %s" % disk_md5
        print "Image MD5: %s" % image_md5
        if fi.md5() != disk_md5 or fi.md5() != image_md5:
            print "fi.md5()=", fi.md5()
            print "disk_md5=", disk_md5
            print "image_md5=", image_md5
            raise RuntimeError, "*** Validation failed***"

    fiwalk.fiwalk_using_sax(xmlfile=file("output.xml"), callback=process)

    if len(times) > 0:
        print ""
        print "========================="
        print "average time of %d trials (ignoring first): %f:" % (
            len(times), sum(times) / len(times))
        print "========================="

예제 #10

0

파일 보기

파일: test_frag_find.py 프로젝트: ncanceill/frag_find

def test_frag_find(img,args,inodes,test_runs=1):

    if not os.path.exists(img):
        raise RuntimeError,img+" does not exist" 

    fns = []
    for inode in inodes:
        fn = "inode.%s" % str(inode)
        fns += [fn]
        if not os.path.exists(fn):
            cmd = ['icat']
            if args:
                cmd += args
            cmd += [img,str(inode)]
            call(cmd,stdout=open(fn,"wb"))
        else:
            print "%s already exists" % fn

    times = []
    pdfs = " ".join(["inode.%s" % str(inode) for inode in inodes])
    for i in range(0,test_runs):
        t0=time.time()
        call(['/usr/bin/time','./frag_find','-xoutput.xml',img] + fns)
        t1=time.time()
        times.append(t1-t0)             # ignore the first time

    # Now valdate the results
    def process(fi):
        disk_md5 = hashlib.md5(file(fi.filename()).read()).hexdigest()
        image_md5 = hashlib.md5(fi.contents(imagefile=file(img))).hexdigest()
        print "validating ",fi.filename()
        print "XML MD5:   %s" % fi.md5()
        print "Disk MD5:  %s" % disk_md5
        print "Image MD5: %s" % image_md5
        if fi.md5()!=disk_md5 or fi.md5()!=image_md5:
            print "fi.md5()=",fi.md5()
            print "disk_md5=",disk_md5
            print "image_md5=",image_md5
            raise RuntimeError, "*** Validation failed***"

    fiwalk.fiwalk_using_sax(xmlfile=file("output.xml"),callback=process)

    if len(times)>0:
        print ""
        print "========================="
        print "average time of %d trials (ignoring first): %f:" % (len(times),sum(times)/len(times))
        print "========================="

예제 #11

0

파일 보기

def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info):

    '''
    #
    # Callback function to process the SAX stream for volume object
    #
    def cbv(fv):
        prtn_info['partition_offset'] = fv.partition_offset()
        prtn_info['block_count'] = fv.block_count()
        prtn_info['last_block'] = fv.last_block()
        prtn_info['first_block'] = fv.first_block()
        prtn_info['block_size'] = fv.block_count()
        prtn_info['ftype'] = fv.ftype()
        prtn_info['ftype_str'] = fv.ftype_str()

        # glb_image_info.append(prtn_info)
        # NOTE: The above will overwrite the list with the new element as
        # every list element! Dumb!

        glb_image_info.append({prtn_info['partition_offset'], \
            prtn_info['block_count'], prtn_info['last_block'], \
            prtn_info['first_block'],  prtn_info['block_size'], \
            prtn_info['ftype'],  prtn_info['ftype_str']}) 
        ## print("DEBUG:", glb_image_info)
        ## print("DEBUG: VolumeObject:", fv)    
        ## print("DEBUG: Image Fileinfo: ", prtn_info)
    '''

    #
    # Callback function to process the SAX stream for file object
    #
    def cb(fi):
        # Form a list of dictionaries of the file attributes from the
        # xml file. Each dictionary represents one file (FiwalkReport.fiDictList)

        bc_make_dict(fi, FiwalkReport, fn)

    xmlfile = open(fn, 'rb')

    # Currently we support taking only xml file as input. The following
    # check is for future enhancement.
    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML fle
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)

예제 #12

0

파일 보기

파일: bc_genrep_dfxml.py 프로젝트: pombredanne/bitcurator

def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info,
                                 image_info):
    '''
    #
    # Callback function to process the SAX stream for volume object
    #
    def cbv(fv):
        prtn_info['partition_offset'] = fv.partition_offset()
        prtn_info['block_count'] = fv.block_count()
        prtn_info['last_block'] = fv.last_block()
        prtn_info['first_block'] = fv.first_block()
        prtn_info['block_size'] = fv.block_count()
        prtn_info['ftype'] = fv.ftype()
        prtn_info['ftype_str'] = fv.ftype_str()

        # glb_image_info.append(prtn_info)
        # NOTE: The above will overwrite the list with the new element as
        # every list element! Dumb!

        glb_image_info.append({prtn_info['partition_offset'], \
            prtn_info['block_count'], prtn_info['last_block'], \
            prtn_info['first_block'],  prtn_info['block_size'], \
            prtn_info['ftype'],  prtn_info['ftype_str']}) 
        ## print("DEBUG:", glb_image_info)
        ## print("DEBUG: VolumeObject:", fv)    
        ## print("DEBUG: Image Fileinfo: ", prtn_info)
    '''

    #
    # Callback function to process the SAX stream for file object
    #
    def cb(fi):
        # Form a list of dictionaries of the file attributes from the
        # xml file. Each dictionary represents one file (FiwalkReport.fiDictList)

        bc_make_dict(fi, FiwalkReport, fn)

    xmlfile = open(fn, 'rb')

    # Currently we support taking only xml file as input. The following
    # check is for future enhancement.
    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML fle
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'), callback=cb)

예제 #13

0

파일 보기

파일: bc_genrep_dfxml.py 프로젝트: cy-fir/bitcurator

def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info, image_info):

    '''
    #
    # Callback function to process the SAX stream for volume object
    #
    def cbv(fv):
        prtn_info['partition_offset'] = fv.partition_offset()
        prtn_info['block_count'] = fv.block_count()
        prtn_info['last_block'] = fv.last_block()
        prtn_info['first_block'] = fv.first_block()
        prtn_info['block_size'] = fv.block_count()
        prtn_info['ftype'] = fv.ftype()
        prtn_info['ftype_str'] = fv.ftype_str()

        # glb_image_info.append(prtn_info)
        # NOTE: The above will overwrite the list with the new element as
        # every list element! Dumb!

        glb_image_info.append({prtn_info['partition_offset'], \
            prtn_info['block_count'], prtn_info['last_block'], \
            prtn_info['first_block'],  prtn_info['block_size'], \
            prtn_info['ftype'],  prtn_info['ftype_str']}) 
        ## print("DEBUG:", glb_image_info)
        ## print("DEBUG: VolumeObject:", fv)    
        ## print("DEBUG: Image Fileinfo: ", prtn_info)
    '''

    #
    # Callback function to process the SAX stream for file object
    #
    def cb(fi):
        # Form a list of dictionaries of the file attributes from the
        # xml file. Each dictionary represents one file (FiwalkReport.fiDictList)

        bc_make_dict(fi, FiwalkReport, fn)

    xmlfile = open(fn, 'rb')

    # We assume that we are processing a fiwalk XML fle
    fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)

    '''

예제 #14

0

파일 보기

파일: bc_genrep_xls.py 프로젝트: sesuncedu/bitcurator

def process_files(fn, ws):

    row_idx = [2]

    # Callback function to process the SAX stream
    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            ext = fi.ext()
            if ext:
                build_local_wb(ws, fi, row_idx[0])
                row_idx[0] += 1

    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML file
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)

예제 #15

0

파일 보기

def process_files(fn, ws):

    row_idx = [2]

    # Callback function to process the SAX stream
    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            ext = fi.ext()
            if ext:
                build_local_wb(ws, fi, row_idx[0])
                row_idx[0] += 1

    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML file
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)

예제 #16

0

파일 보기

파일: bc_genrep_dfxml.py 프로젝트: cy-fir/bitcurator

def bc_process_xmlfile_using_sax(FiwalkReport, fn, prtn_info, glb_image_info,
                                 image_info):
    '''
    #
    # Callback function to process the SAX stream for volume object
    #
    def cbv(fv):
        prtn_info['partition_offset'] = fv.partition_offset()
        prtn_info['block_count'] = fv.block_count()
        prtn_info['last_block'] = fv.last_block()
        prtn_info['first_block'] = fv.first_block()
        prtn_info['block_size'] = fv.block_count()
        prtn_info['ftype'] = fv.ftype()
        prtn_info['ftype_str'] = fv.ftype_str()

        # glb_image_info.append(prtn_info)
        # NOTE: The above will overwrite the list with the new element as
        # every list element! Dumb!

        glb_image_info.append({prtn_info['partition_offset'], \
            prtn_info['block_count'], prtn_info['last_block'], \
            prtn_info['first_block'],  prtn_info['block_size'], \
            prtn_info['ftype'],  prtn_info['ftype_str']}) 
        ## print("DEBUG:", glb_image_info)
        ## print("DEBUG: VolumeObject:", fv)    
        ## print("DEBUG: Image Fileinfo: ", prtn_info)
    '''

    #
    # Callback function to process the SAX stream for file object
    #
    def cb(fi):
        # Form a list of dictionaries of the file attributes from the
        # xml file. Each dictionary represents one file (FiwalkReport.fiDictList)

        bc_make_dict(fi, FiwalkReport, fn)

    xmlfile = open(fn, 'rb')

    # We assume that we are processing a fiwalk XML fle
    fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'), callback=cb)
    '''

예제 #17

0

파일 보기

파일: redact.py 프로젝트: BitCurator/bca-redtools

    def execute(self):
        if self.conf.get('commit'):
            logging.info("Commit is ON. Will perform redactions..")
        else:
            logging.info("Commit is OFF. Performing dry-run only..")
        if self.report_logger is not None:
            self.report_logger.info('{')
            self.report_logger.info('"configuration": ')
            self.report_logger.info(_(self.kwargs))
            self.report_logger.info(',')
            self.report_logger.info('"redactions": [')
        logging.debug('DEBUG OUTPUT IS ON')
        # Copy input_file to output_file
        if not self.output_file.closed:
            self.output_file.close()
        if not self.input_file.closed:
            self.input_file.close()
        import time
        t0 = time.time()  # start a timer
        shutil.copy(self.input_file.name, self.output_file.name)
        self.output_file = open(self.output_file.name, 'r+')
        self.input_file = open(self.input_file.name, 'r')

        fiwalk.fiwalk_using_sax(
            imagefile=self.output_file,
            xmlfile=self.dfxml_file,
            callback=self.process_file)
        self.close_files()

        if self.redacted_count == 1:
            logging.info("Finished. 1 file was redacted.")
        else:
            logging.info("Finished. %d files were redacted." % self.redacted_count)
        elapsed = time.time() - t0
        logging.debug("Time to run: %d seconds" % elapsed)
        if self.report_logger is not None:
            self.report_logger.info('],')
            self.report_logger.info('"runtime": %d' % elapsed)
            self.report_logger.info('}')

예제 #18

0

파일 보기

파일: bc_genrep_dfxml.py 프로젝트: sesuncedu/bitcurator

def bc_process_xmlfile_using_sax(FiwalkReport, fn, image_info):

    #
    # Callback function to process the SAX stream for volume object
    #
    def cbv(fv):
        image_info['partition_offset'] = fv.partition_offset()
        image_info['block_count'] = fv.block_count()
        image_info['last_block'] = fv.last_block()
        image_info['first_block'] = fv.first_block()
        image_info['block_size'] = fv.block_count()
        image_info['ftype'] = fv.ftype()
        image_info['ftype_str'] = fv.ftype_str()
        
        ## print("DEBUG: VolumeObject:", fv)    
        ## print("DEBUG: Image Fileinfo: ", image_info)

    #
    # Callback function to process the SAX stream for file object
    #
    def cb(fi):
        # Form a list of dictionaries of the file attributes from the
        # xml file. Each dictionary represents one file (FiwalkReport.fiDictList)

        bc_make_dict(fi, FiwalkReport, fn)

    xmlfile = open(fn, 'rb')

    # Currently we support taking only xml file as input. The following
    # check is for future enhancement.
    if fn.endswith('xml'):
        # We use this call if we're processing a fiwalk XML fle
        fiwalk.fiwalk_using_sax(xmlfile=open(fn, 'rb'),callback=cb)
        r = fiwalk.fiwalk_vobj_using_sax(xmlfile=open(fn, 'rb'),callback=cbv)
        image_info['image_filename'] = r.imageobject._tags['image_filename']
    else:
        # We use this call if we're processing a disk image
        fiwalk.fiwalk_using_sax(imagefile=open(fn, 'rb'),callback=cb)

예제 #19

0

파일 보기

파일: make_summary_stats.py 프로젝트: vicgc/fsnview

 def count_xml(self,prog,xmlfile):
     log.debug("count_xml(prog=%r, xmlfile=_)" % prog)
     try:
         fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile, "rb"), callback=self.process_fi)
     except:
         pass
     log.debug("count_xml: self.failed=%r" % self.failed)
     #Accumulate staged stats into succeeded or failed pile;
     #TODO This might not be the best volume counting.
     log.debug("count_xml: self.stats_staging = %r" % self.stats_staging)
     if self.failed:
         self.roll_stats(prog, self.stats_missed)
         log.debug("count_xml: self.stats_missed=%r" % self.stats_missed)
         self.stats_missed["images/" + prog] += 1
         self.stats_missed["volumes/" + prog] += len(self.volumes)
     else:
         self.roll_stats(prog, self.stats_summary)
         log.debug("count_xml: self.stats_summary=%r" % self.stats_summary)
         self.stats_summary["images/" + prog] += 1
         self.stats_summary["volumes/" + prog] += len(self.volumes)
     #Reset staging state.
     self.stats_staging = collections.defaultdict(lambda: 0)
     self.volumes = set()
     self.failed = False

예제 #20

0

파일 보기

        sys.exit(1)

    imagefile = open(args[0], "r")
    annotated_runs = []
    if options.debug:
        print("Read %d file objects from %s" %
              (len(fileobjects), imagefile.name))

    def cb(fi):
        if options.debug: print("Read " + str(fi))
        fragment_num = 1
        for run in fi.byte_runs():
            annotated_runs.append((run.img_offset, run, fragment_num, fi))
            fragment_num += 1

    fiwalk.fiwalk_using_sax(imagefile=imagefile, callback=cb)

    next_sector = 0

    for (ip, run, fragment_num, fi) in sorted(annotated_runs):
        extra = ""
        fragment = ""
        start_sector = run.img_offset / 512
        sector_count = int(run.bytes / 512)
        partial = run.bytes % 512

        if not fi.allocated():
            print("***")

        if not fi.file_present():  # it's not here!
            continue

예제 #21

0

파일 보기

파일: ireport.py 프로젝트: victoryang1/census2020-das-e2e

def process_files(fn):
    drive_files = {}  # index of drives
    all_parts = []
    all_files = []
    files_by_md5 = {}  # a dictionary of sets of fiobject, indexed by md5
    extension_len_histogram = histogram2d()
    extension_fragments_histogram = histogram2d()
    partition_histogram = histogram2d()

    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            files_by_md5.get(fi.md5, set()).add(fi)
            ext = fi.ext()
            if not ext: print fi.meta_type(), fi
            extension_len_histogram.add(ext, fi.filesize())
            extension_fragments_histogram.add(ext, fi.fragments())
            partition_histogram.add(fi.partition(), fi.filesize())

    if fn.endswith('xml'):
        fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=cb)
    else:
        fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=cb)

    #
    # Typeset the information
    #

    tab = ttable()
    tab.header = "File extension popularity and average size (suppressing 0-len files)"
    tab.col_headings = [['Ext', 'Count', 'Average Size', 'Max', 'Std Dev']]
    tab.omit_row = [[0, '']]
    extension_len_histogram.statcol = ['iaverage', 'maxx', 'istddev']
    print extension_len_histogram.typeset(tab=tab)

    #
    # Information about fragmentation patterns
    #
    tab = ttable()
    tab.header = "Fragmentation pattern by file system and file type:"
    tab.col_headings = [['Ext', 'Count', 'Average Size', 'Max', 'Std Dev']]
    tab.omit_row = [[0, '']]
    extension_fragments_histogram.statcol = ['iaverage', 'maxx', 'istddev']
    print extension_fragments_histogram.typeset(tab=tab)
    exit(0)

    for fstype in fstypes:
        for ftype in ['jpg', 'pdf', 'doc', 'txt']:
            len1stats = statbag()
            len2stats = statbag()
            delta_hist = histogram()
            delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?")
            for i in filter((lambda
                             (f): f.ext() == ftype and f.fragments == 2),
                            all_files):
                runs = False
                if (hasattr(i, 'block_runs')): runs = i.block_runs
                if (hasattr(i, 'sector_runs')): runs = i.sector_runs
                if not runs: continue
                m = delta_re.search(runs)
                r = []
                for j in range(1, 5):
                    try:
                        r.append(int(m.group(j)))
                    except TypeError:
                        r.append(int(m.group(j - 1)))

                len1 = r[1] - r[0] + 1
                len2 = r[3] - r[2] + 1
                delta = r[2] - r[1]

                len1stats.addx(len1)
                len2stats.addx(len2)
                delta_hist.add(delta)

            if len1stats.count() > 0:
                print "\n\n"
                print "fstype:", fstype, "  ftype:", ftype
                print "len1 average: %f stddev: %f" % (len1stats.average(),
                                                       len1stats.stddev())
                print "len2 average: %f stddev: %f" % (len2stats.average(),
                                                       len2stats.stddev())
                print "delta average: %f" % delta_hist.average()
                print "delta histogram:"
                delta_hist.print_top(10)

예제 #22

0

파일 보기

파일: identify_filenames.py 프로젝트: BruceMty/bulk_extractor

 def read_xmlfile(self,fname):
     if fname.endswith(".xml"):
         fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),callback=self.process)
     else:
         fiwalk.fiwalk_using_sax(imagefile=open(fname,'rb'),callback=self.process)

예제 #23

0

파일 보기

파일: corpus_sync.py 프로젝트: pombredanne/bitcurator

 def ingest_dfxml(self, fname):
     fiwalk.fiwalk_using_sax(xmlfile=open(fname, 'rb'),
                             flags=fiwalk.ALLOC_ONLY,
                             callback=self.process_fi)

예제 #24

0

파일 보기

파일: verify_fiwalk_versus_tsk_db.py 프로젝트: ajnelson/geoproc

        sys.stdout.write("Done.\n")
        sys.stdout.write("Creating copy of byte-runs-esque table for indexing...")
    #There isn't currently an index for the byte runs in TSK.
    outcur.execute("""
      CREATE TABLE indexed_tsk_file_layout AS
      SELECT
        *,
        byte_start + byte_len AS byte_end
      FROM
        tsk.tsk_file_layout
      ORDER BY
        byte_start, byte_end, obj_id;
    """)
    outcur.execute("CREATE INDEX itfl_start ON indexed_tsk_file_layout(byte_start);")
    outcur.execute("CREATE INDEX itfl_end ON indexed_tsk_file_layout(byte_end);")
    if args.verbose:
        sys.stdout.write("Done.\n")
    sys.stdout.flush()

    #Process DFXML
    with open(args.dfxml, "rb") as xmlfh:
        fiwalk.fiwalk_using_sax(xmlfile=xmlfh, callback=proc_dfxml)
    if args.verbose:
        sys.stdout.write("Done.\n")

    #Cleanup
    outconn.commit()
    outcur.execute("DETACH DATABASE tsk;")
    outcur.close()
    tskcur.close()

예제 #25

0

파일 보기

          fi.filename(),
          fi.mtime(), fi.atime(), fi.ctime(), fi.crtime()
        ])))
        outfile = open(outfilename, "wb")
        outfile.write(fi.contents())
        outfile.close()
        if hivexml_command:
            command_string = hivexml_command + " " + outfilename + " >" + outfilename+".regxml" + " 2>" + outfilename + ".err.log"
            sysrc = os.system(command_string)
            if sysrc:
                sys.stderr.write("Error, see err.log: " + command_string + "\n")

if __name__=="__main__":
    global hivexml_command
    global imageabspath

    parser = argparse.ArgumentParser(description="Find registry files in imagefile and dump hives to files in pwd in the order they're encountered, with a manifest printed to stdout.")
    parser.add_argument("-x", "--xml", dest="dfxml_file_name", help="Already-created DFXML file for imagefile")
    parser.add_argument("--hivexml", dest="hivexml_command", action="store_const", const="hivexml", default="",  help="Run hivexml command on each hive, producing output at <hive>.regxml, stderr at <hive>.err.log")
    parser.add_argument("imagefilename", help="Image file")
    args = parser.parse_args()
    
    hivexml_command = args.hivexml_command

    xmlfh = None
    if args.dfxml_file_name != None:
        xmlfh = open(args.dfxml_file_name, "rb")
    imageabspath = os.path.abspath(args.imagefilename)

    fiwalk.fiwalk_using_sax(imagefile=open(imageabspath, "r"), xmlfile=xmlfh, callback=proc_dfxml)

예제 #26

0

파일 보기

파일: identify_filenames.py 프로젝트: stumpyuk1/bulk_extractor

def process_featurefile(args, report, featurefile):
    # Counters for the summary report
    global file_count
    features = featuredb()
    unallocated_count = 0
    feature_count = 0
    features_compressed = 0
    located_count = 0
    unicode_encode_errors = 0
    unicode_decode_errors = 0
    file_count = 0

    ofn = os.path.join(args.outdir, ("annotated_" + featurefile))
    if os.path.exists(ofn):
        raise RuntimeError(ofn + " exists")
    of = open(ofn, "wb")

    # First read the feature files
    print("Adding features from " + featurefile)
    try:
        linenumber = 0
        for line in report.open(featurefile, mode='rb'):
            # Read the file in binary and convert to unicode if possible
            linenumber += 1
            if bulk_extractor_reader.is_comment_line(line):
                continue
            try:
                fset = features.add_featurefile_line(line[0:-1])
                feature_count += 1
                if (b"ZIP" in fset[0]) or (b"HIBER" in fset[0]):
                    features_compressed += 1
                del fset
            except ValueError:
                raise RuntimeError(
                    "Line {} in feature file {} is invalid: {}".format(
                        linenumber, featurefile, line))
    except IOError:
        print("Error: Failed to open feature file '%s'" % fn)
        exit(1)

    if args.debug:
        print('')
        features.print_debug()

    # feature2fi is a map each feature to the file in which it was found
    feature2fi = {}

    ################################################################
    # If we got features in the featuredb, find out the file that each one came from
    # by scanning all of the files and, for each byte run, indicating the features
    # that are within the byte run
    if features.count() > 0:
        global filecount

        def process(fi):
            global file_count
            file_count += 1
            if args.verbose or args.debug:
                print("%d %s (%d fragments)" %
                      (file_count, fi.filename(), fi.fragments()))
            for run in fi.byte_runs():
                for (offset, fset) in features.search(run):
                    if args.debug:
                        print("  run={} offset={} fset={} ".format(
                            run, offset, fset))
                    feature2fi[findex(
                        fset
                    )] = fi  # for each of those features, not that it is in this file
            if file_count % 1000 == 0:
                print("Processed %d fileobjects in DFXML file" % file_count)

        xmlfile = None
        if args.xmlfile:
            xmlfile = args.xmlfile
        else:
            if args.imagefile:
                imagefile = args.imagefile
            else:
                imagefile = report.imagefile()
            # See if there is an xmlfile
            (root, ext) = os.path.splitext(imagefile)
            possible_xmlfile = root + ".xml"
            if os.path.exists(possible_xmlfile):
                xmlfile = possible_xmlfile
        if xmlfile:
            print("Using XML file " + xmlfile)
            fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile, 'rb'),
                                    callback=process)
        else:
            print("Running fiwalk on " + imagefile)
            fiwalk.fiwalk_using_sax(imagefile=open(imagefile, 'rb'),
                                    callback=process)
    else:
        print("No features found; copying feature file")
    ################################################################

    print("Generating output...")

    # Now print all of the features
    if args.terse:
        of.write(b"# Position\tFeature\tFilename\n")
    else:
        of.write(b"# Position\tFeature\tContext\tFilename\tFile MD5\n")
    for (offset, fset) in features:
        try:
            of.write(fset[0])  # pos
            of.write(b"\t")
            of.write(fset[1])  # feature
            of.write(b"\t")
            try:
                if not args.terse:
                    of.write(fset[2])  # context
            except IndexError:
                pass  # no context
            try:
                fi = feature2fi[findex(fset)]
                of.write(b"\t")
                if fi.filename(): of.write(fi.filename().encode('utf-8'))
                if args.debug:
                    print("pos=", offset, "feature=", fset[1], "fi=", fi,
                          "fi.filename=", fi.filename())
                if not args.terse:
                    of.write(b"\t")
                    if fi.md5(): of.write(fi.md5().encode('utf-8'))
                located_count += 1
            except KeyError:
                unallocated_count += 1
                pass  # cannot locate
            of.write(b"\n")
        except UnicodeEncodeError:
            unicode_encode_errors += 1
            of.write(b"\n")
        except UnicodeDecodeError:
            unicode_decode_errors += 1
            of.write(b"\n")

    # stop the timer used to calculate the total run time
    t1 = time.time()

    # Summary report
    for (title, value) in [
        ["# Total features input: {}", feature_count],
        ["# Total features located to files: {}", located_count],
        ["# Total features in unallocated space: {}", unallocated_count],
        ["# Total features in compressed regions: {}", features_compressed],
        ["# Unicode Encode Errors: {}", unicode_encode_errors],
        ["# Unicode Decode Errors: {}", unicode_decode_errors],
        ["# Total processing time: {:.2} seconds", t1 - t0]
    ]:
        of.write((title + "\n").format(value).encode('utf-8'))

예제 #27

0

파일 보기

#!/usr/bin/python
"""Usage: igrep imagefile.iso string ...

Reports the files in which files have the string.
"""
import fiwalk,dfxml

if __name__=="__main__":
    import sys

    from optparse import OptionParser
    parser = OptionParser()
    parser.usage = '%prog [options] image.iso  s1'
    parser.add_option("-d","--debug",help="debug",action="store_true")
    (options,args) = parser.parse_args()

    if len(args)!=2:
        parser.print_help()
        sys.exit(1)

    (imagefn,data) = args

    def process(fi):
        offset = fi.contents().find(data)
        if offset>0:
            print "%s (offset=%d)" % (fi.filename(),offset)
            
    fiwalk.fiwalk_using_sax(imagefile=open(imagefn),callback=process)

예제 #28

0

파일 보기

파일: bc_disk_access.py 프로젝트: cy-fir/bitcurator

 def bcProcessDfxmlFileUsingSax(self, dfxmlfile):
     fiwalk.fiwalk_using_sax(xmlfile=open(dfxmlfile, 'rb'),callback=self.cb)

예제 #29

0

파일 보기

파일: identify_filenames.py 프로젝트: andy737/bulk_extractor

 def read_xmlfile(self,fname):
     print("Reading file map from XML file {}".format(fname))
     fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),callback=self.process)

예제 #30

0

파일 보기

파일: ireport.py 프로젝트: grayed/dfxml

def process_files(fn):
    drive_files = {}                         # index of drives
    all_parts  = []
    all_files = []
    files_by_md5 = {}           # a dictionary of sets of fiobject, indexed by md5
    extension_len_histogram = histogram2d()
    extension_fragments_histogram = histogram2d()
    partition_histogram = histogram2d()

    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            files_by_md5.get(fi.md5,set()).add(fi)
            ext = fi.ext()
            if not ext: print(fi.meta_type(),fi)
            extension_len_histogram.add(ext,fi.filesize())
            extension_fragments_histogram.add(ext,fi.fragments())
            partition_histogram.add(fi.partition(),fi.filesize())

    if fn.endswith('xml'):
        fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=cb)
    else:
        fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=cb)


    #
    # Typeset the information
    #

    tab = ttable()
    tab.header     = "File extension popularity and average size (suppressing 0-len files)"
    tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']]
    tab.omit_row = [[0,'']]
    extension_len_histogram.statcol = ['iaverage','maxx','istddev']
    print(extension_len_histogram.typeset(tab=tab))

    #
    # Information about fragmentation patterns
    #
    tab = ttable()
    tab.header="Fragmentation pattern by file system and file type:"
    tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']]
    tab.omit_row = [[0,'']]
    extension_fragments_histogram.statcol = ['iaverage','maxx','istddev']
    print(extension_fragments_histogram.typeset(tab=tab))
    exit(0)

    for fstype in fstypes:
        for ftype in ['jpg','pdf','doc','txt']:
            len1stats = statbag()
            len2stats = statbag()
            delta_hist = histogram()
            delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?")
            for i in filter((lambda f: f.ext()==ftype and f.fragments==2),all_files):
                runs = False
                if(hasattr(i,'block_runs')): runs = i.block_runs
                if(hasattr(i,'sector_runs')): runs = i.sector_runs
                if not runs: continue
                m = delta_re.search(runs)
                r = []
                for j in range(1,5):
                    try:
                        r.append(int(m.group(j)))
                    except TypeError:
                        r.append(int(m.group(j-1)))

                len1 = r[1] - r[0] + 1
                len2 = r[3] - r[2] + 1
                delta = r[2]-r[1]

                len1stats.addx(len1)
                len2stats.addx(len2)
                delta_hist.add(delta)

            if len1stats.count()>0:
                print("\n\n")
                print("fstype:",fstype,"  ftype:",ftype)
                print("len1 average: %f stddev: %f" % (len1stats.average(),len1stats.stddev()))
                print("len2 average: %f stddev: %f" % (len2stats.average(),len2stats.stddev()))
                print("delta average: %f" % delta_hist.average())
                print("delta histogram:")
                delta_hist.print_top(10)


    exit(0)


    print("Partition histogram:")
    partition_histogram.print_top(n=100)
    print("Counts by extension:")
    extension_len_histogram.print_top(n=100)
    print("Fragments by extension:")
    extension_fragments_histogram.print_top(n=100)

    exit(0)
    for fstype in fstypes:
        if fstype=='(unrecognized)': continue
        print(fstype,"Partitions:")

        def isfstype(x): return x.fstype==fstype
        these_parts = filter(isfstype,all_parts)
        these_files = []
        for part in these_parts:
            these_files.extend(part.files)
        print(fragmentation_table(these_files))


    exit(0)

    sys.exit(0)


    #
    # Typeset information about file extensions
    #
    hist_exts = histogram2d()
    hist_exts.topn = 20
    for i in all_files:
        if i.size>0 and i.fragments>0: hist_exts.add(i.ext(),i.size)
    tab = table()
    tab.header     = "File extension popularity and average size (suppressing 0-len files)"
    tab.col_headings = ['Ext','Count','Average Size','Max','Std Dev']
    tab.omit_row = [[0,'']]
    hist_exts.statcol = ['iaverage','maxx','istddev']
    print(hist_exts.typeset(t=tab))

    hist_exts = histogram2d()
    hist_exts.topn = 20
    for i in all_files:
        if i.fragments>0: hist_exts.add(i.ext(),i.fragments)
    tab = table()
    tab.header     = "Fragmentation by file extension (suppressing files with 0 fragments)"
    tab.col_headings = ['Ext','Count','Avg Fragments','Max','Std Dev']
    tab.omit_row = [[0,'']]
    hist_exts.statcol = ['average','maxx','stddev']
    print(hist_exts.typeset(t=tab))

    print("===========================")


    #
    # Typeset the File Systems on Drives table
    #

    tab = table()
    tab.header     = "File Systems on Drives"
    tab.col_headings = ["FS Type","Drives","MBytes"]
    tab.col_totals = [1,2]
    fstypeh.statcol = 'sumx'
    print(fstypeh.typeset(t=tab))

    #
    # Typeset overall fragmentation stats
    #

    print(fragmentation_table(all_files))

예제 #31

0

파일 보기

파일: demo_sizes.py 프로젝트: atilaromero/sleuthkit

import fiwalk,math

total = 0 
total2 = 0
count = 0

def func(fi):
    global total,total2,count
    if fi.ext()=='txt':
        total += fi.filesize()
        total2 += fi.filesize() ** 2
        count += 1

fiwalk.fiwalk_using_sax(imagefile=open("small.dmg"),callback=func)
print "count=",count
print "average=",total/count
print "stddev=",math.sqrt(total2/count - (total/count)**2)

예제 #32

0

파일 보기

파일: demo_plot_times.py 프로젝트: pombredanne/bitcurator

if __name__ == "__main__":
    import sys
    from optparse import OptionParser
    from sys import stdout
    parser = OptionParser()
    parser.usage = '%prog [options] (xmlfile or imagefile)'
    (options, args) = parser.parse_args()

    if not args:
        parser.print_usage()
        exit(1)

    sizes = []
    dates = {}

    def callback(fi):
        sizes.append(fi.filesize())
        for (tag, val) in (fi.times().iteritems()):
            date = val.datetime()
            dates[date] = dates.get(date, 0) + 1

    fn = args[0]
    if fn.endswith(".xml"):
        fiwalk.fiwalk_using_sax(xmlfile=open(fn), callback=callback)
    else:
        fiwalk.fiwalk_using_sax(imagefile=open(fn), callback=callback)

    print("Here is the dates array:")
    for d in sorted(dates.keys()):
        print("{}   {}".format(d, dates[d]))

예제 #33

0

파일 보기

파일: idifference.py 프로젝트: atilaromero/sleuthkit

 def process(self,fname):
     self.current_fname = fname
     if fname.endswith(".xml"):
         fiwalk.fiwalk_using_sax(xmlfile=open(infile), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)
     else:
         fiwalk.fiwalk_using_sax(imagefile=open(infile), flags=fiwalk.ALLOC_ONLY, callback=self.process_fi)

예제 #34

0

파일 보기

 def read_xmlfile(self, fname):
     print("Reading file map from XML file {}".format(fname))
     fiwalk.fiwalk_using_sax(xmlfile=open(fname, 'rb'),
                             callback=self.process)

예제 #35

0

파일 보기

    parser.usage = '%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile'
    (options, args) = parser.parse_args()

    if len(args) < 3:
        parser.print_help()
        exit(1)

    imagefilename = args[0]
    xmlfilename = options.xmlfilename
    xmlfh = None
    if xmlfilename != None:
        xmlfh = open(xmlfilename, "r")
    zipfilename = args[1]
    targets = set([fn.lower() for fn in args[2:]])
    zfile = zipfile.ZipFile(zipfilename, "w", allowZip64=True)

    def proc(fi):
        basename = os.path.basename(fi.filename()).lower()
        if basename in targets:
            info = zipfile.ZipInfo(
                fi.filename(),
                datetime.datetime.fromtimestamp(
                    fi.mtime().timestamp()).utctimetuple())
            info.internal_attr = 1
            info.external_attr = 2175008768  # specifies mode 0644
            zfile.writestr(info, fi.contents())

    fiwalk.fiwalk_using_sax(imagefile=open(imagefilename),
                            xmlfile=xmlfh,
                            callback=proc)

예제 #36

0

파일 보기

파일: bc_disk_access.py 프로젝트: IdahoInstitute/bitcurator

 def bcProcessDfxmlFileUsingSax(self, dfxmlfile):
     fiwalk.fiwalk_using_sax(xmlfile=open(dfxmlfile, 'rb'),callback=self.cb)

예제 #37

0

파일 보기

파일: imap.py 프로젝트: Acidburn0zzz/dfxml

    if len(args)<1:
        parser.print_help()
        sys.exit(1)

    imagefile = open(args[0],"r")
    annotated_runs = []
    if options.debug: print("Read %d file objects from %s" % (len(fileobjects),imagefile.name))

    def cb(fi):
        if options.debug: print("Read "+str(fi))
        fragment_num = 1
        for run in fi.byte_runs():
            annotated_runs.append((run.img_offset,run,fragment_num,fi))
            fragment_num += 1
    fiwalk.fiwalk_using_sax(imagefile=imagefile,callback=cb)

    next_sector = 0

    for (ip,run,fragment_num,fi) in sorted(annotated_runs):
        extra = ""
        fragment = ""
        start_sector = run.img_offset/512
        sector_count = int(run.bytes/512)
        partial        = run.bytes % 512
    
        if not fi.allocated():
            print("***")

        if not fi.file_present():       # it's not here!
            continue

예제 #38

0

파일 보기

파일: demo_plot_times.py 프로젝트: atilaromero/sleuthkit

import time

if __name__=="__main__":
    import sys
    from optparse import OptionParser
    from sys import stdout
    parser = OptionParser()
    parser.usage = '%prog [options] xmlfile '
    (options,args) = parser.parse_args()

    sizes = []
    dates = {}
    def callback(fi):
        sizes.append(fi.filesize())
        for (tag,val) in (fi.times().iteritems()):
            date = val.datetime()
            dates[date] = dates.get(date,0)+1

    fiwalk.fiwalk_using_sax(xmlfile=open(args[0],"r"),callback=callback)
    try:
        import pylab
        pylab.grid()
        pylab.hist(times,100)
        pylab.show()
    except ImportError:
        print("pylab not installed.")
        print("Date\tActivity Count:")
        for date in sorted(dates.keys()):
            print("%s\t%d" % (date,dates[date]))

예제 #39

0

파일 보기

파일: corpus_sync.py 프로젝트: Acidburn0zzz/dfxml

 def ingest_dfxml(self,fname):
     fiwalk.fiwalk_using_sax(xmlfile=open(fname,'rb'),flags=fiwalk.ALLOC_ONLY,callback=self.process_fi)

예제 #40

0

파일 보기

파일: iextract.py 프로젝트: Acidburn0zzz/dfxml

if __name__=="__main__":
    from optparse import OptionParser
    parser = OptionParser()
    parser.add_option("-x", "--xml", dest="xmlfilename", help="Already-created DFXML file for imagefile")
    parser.usage = '%prog [options] imagefile zipfile [x1 x2 x3]\nFind files x1, x2, x3 ... in imagefile and write to zipfile'
    (options,args) = parser.parse_args()

    if len(args)<3:
        parser.print_help()
        exit(1)

    imagefilename = args[0]
    xmlfilename = options.xmlfilename
    xmlfh = None
    if xmlfilename != None:
        xmlfh = open(xmlfilename, "r")
    zipfilename = args[1]
    targets = set([fn.lower() for fn in args[2:]])
    zfile = zipfile.ZipFile(zipfilename,"w",allowZip64=True)
    
    def proc(fi):
        basename = os.path.basename(fi.filename()).lower()
        if basename in targets:
            info = zipfile.ZipInfo(fi.filename(),datetime.datetime.fromtimestamp(fi.mtime().timestamp()).utctimetuple())
            info.internal_attr = 1
            info.external_attr = 2175008768 # specifies mode 0644
            zfile.writestr(info,fi.contents())
    fiwalk.fiwalk_using_sax(imagefile=open(imagefilename), xmlfile=xmlfh, callback=proc)

예제 #41

0

파일 보기

파일: imicrosoft_redact.py 프로젝트: Acidburn0zzz/dfxml

    from optparse import OptionParser
    from subprocess import Popen,PIPE
    global options,xml_out
    from glob import glob

    parser = OptionParser()
    parser.usage = "%prog [options] imagefile"
    parser.add_option("-d","--debug",help="prints debugging info",dest="debug",action="store_true")
    parser.add_option("-c","--commit",help="Really do the redaction",action="store_true")
    parser.add_option("--all",help="Do all",action="store_true")
    (options,args) = parser.parse_args()

    # First read all of the redaction files
    for fn in glob("*redacted.xml*"):
        try:
            fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=lambda fi:redact_filenames.add(fi.filename()))
        except xml.parsers.expat.ExpatError:
            print "Invalid XML file:",fn
    print "number of filenames in redaction XML:",len(redact_filenames)

    if options.all:
        for fn in glob("*.aff"):
            raw = fn.replace(".aff",".raw")
            if not os.path.exists(raw):
                print "%s --> %s" % (fn,raw)
                if call(['afconvert','-e','raw',fn])!=0:
                    raise RuntimeError,"afconvert of %s failed" % fn
        fns = glob("*.raw")
    else:
        fns = args

예제 #42

0

파일 보기

파일: foresole.py 프로젝트: anarchivist/foresole

def main():
	fiwalk.fiwalk_using_sax(imagefile=file(IMAGE), callback=index_fobj)
	SOLR.commit()

예제 #43

0

파일 보기

파일: iredact.py 프로젝트: geoffblack/dfxml

            self.image_file.close()
        if self.xml_file and self.xml_file.closed == False:
            print "closing file %s" % self.xml_file.name
            self.xml_file.close()

if __name__=="__main__":
    import sys,time
    from optparse import OptionParser
    from subprocess import Popen,PIPE
    global options

    parser = OptionParser()
    parser.usage = "%prog [options] config-file"
    parser.add_option("-d","--debug",help="prints debugging info",dest="debug")
    (options,args) = parser.parse_args()

    t0 = time.time()
    # Read the redaction configuration file
    rc = RedactConfig(args[0])

    if not rc.imagefile:
        print "Error: a filename must be specified in the redaction config file"
        sys.exit(1)

    fiwalk.fiwalk_using_sax(imagefile=rc.imagefile,xmlfile=rc.xmlfile,callback=rc.process_file)
    t1 = time.time()

    rc.close_files()

    print "Time to run: %d seconds" % (t1-t0)

예제 #44

0

파일 보기

파일: identify_filenames.py 프로젝트: stumpyuk1/bulk_extractor

def process_featurefile(args,report,featurefile):
    # Counters for the summary report
    global file_count
    features = featuredb()
    unallocated_count = 0
    feature_count = 0
    features_compressed = 0
    located_count = 0
    unicode_encode_errors = 0
    unicode_decode_errors = 0
    file_count = 0

    ofn = os.path.join(args.outdir,("annotated_" + featurefile ))
    if os.path.exists(ofn):
        raise RuntimeError(ofn+" exists")
    of = open(ofn,"wb")

    # First read the feature files
    print("Adding features from "+featurefile)
    try:
        linenumber = 0
        for line in report.open(featurefile,mode='rb'):
            # Read the file in binary and convert to unicode if possible
            linenumber += 1
            if bulk_extractor_reader.is_comment_line(line):
                continue
            try:
                fset = features.add_featurefile_line(line[0:-1])
                feature_count += 1
                if (b"ZIP" in fset[0]) or (b"HIBER" in fset[0]):
                    features_compressed += 1
                del fset
            except ValueError:
                raise RuntimeError("Line {} in feature file {} is invalid: {}".format(linenumber,featurefile,line))
    except IOError:
         print("Error: Failed to open feature file '%s'" % fn)
         exit(1)
    
    if args.debug:
        print('')
        features.print_debug()

    # feature2fi is a map each feature to the file in which it was found
    feature2fi = {}

    ################################################################
    # If we got features in the featuredb, find out the file that each one came from
    # by scanning all of the files and, for each byte run, indicating the features
    # that are within the byte run
    if features.count()>0:
        global filecount
        def process(fi):
            global file_count
            file_count += 1
            if args.verbose or args.debug:
                print("%d %s (%d fragments)" % (file_count,fi.filename(),fi.fragments()))
            for run in fi.byte_runs():
                for (offset,fset) in features.search(run):
                    if args.debug:
                        print("  run={} offset={} fset={} ".format(run,offset,fset))
                    feature2fi[findex(fset)] = fi    # for each of those features, not that it is in this file
            if file_count%1000==0:
                print("Processed %d fileobjects in DFXML file" % file_count)

        xmlfile = None
        if args.xmlfile:
            xmlfile = args.xmlfile
        else:
            if args.imagefile:
                imagefile = args.imagefile
            else:
                imagefile = report.imagefile()
            # See if there is an xmlfile
            (root,ext) = os.path.splitext(imagefile)
            possible_xmlfile = root+".xml"
            if os.path.exists(possible_xmlfile):
                xmlfile = possible_xmlfile
        if xmlfile:
            print("Using XML file "+xmlfile)
            fiwalk.fiwalk_using_sax(xmlfile=open(xmlfile,'rb'),callback=process)
        else:
            print("Running fiwalk on " + imagefile)
            fiwalk.fiwalk_using_sax(imagefile=open(imagefile,'rb'),callback=process)
    else:
        print("No features found; copying feature file")
    ################################################################

    print("Generating output...")

    # Now print all of the features
    if args.terse:
        of.write(b"# Position\tFeature\tFilename\n")
    else:
        of.write(b"# Position\tFeature\tContext\tFilename\tFile MD5\n")
    for (offset,fset) in features:
        try:
            of.write(fset[0]) # pos
            of.write(b"\t")
            of.write(fset[1]) # feature
            of.write(b"\t")
            try:
                if not args.terse:
                    of.write(fset[2]) # context
            except IndexError:
                pass            # no context
            try:
                fi = feature2fi[findex(fset)]
                of.write(b"\t")
                if fi.filename(): of.write(fi.filename().encode('utf-8'))
                if args.debug:
                    print("pos=",offset,"feature=",fset[1],"fi=",fi,"fi.filename=",fi.filename())
                if not args.terse:
                    of.write(b"\t")
                    if fi.md5(): of.write(fi.md5().encode('utf-8'))
                located_count += 1
            except KeyError:
                unallocated_count += 1
                pass            # cannot locate
            of.write(b"\n")
        except UnicodeEncodeError:
            unicode_encode_errors += 1
            of.write(b"\n")
        except UnicodeDecodeError:
            unicode_decode_errors += 1
            of.write(b"\n")

    # stop the timer used to calculate the total run time
    t1 = time.time()

    # Summary report
    for (title,value) in [["# Total features input: {}",feature_count],
                          ["# Total features located to files: {}",located_count],
                          ["# Total features in unallocated space: {}",unallocated_count],
                          ["# Total features in compressed regions: {}",features_compressed],
                          ["# Unicode Encode Errors: {}",unicode_encode_errors],
                          ["# Unicode Decode Errors: {}",unicode_decode_errors],
                          ["# Total processing time: {:.2} seconds",t1-t0]]:
        of.write((title+"\n").format(value).encode('utf-8'))

예제 #45

0

파일 보기

파일: iredact.py 프로젝트: SteveClement/dfxml

if __name__ == "__main__":
    import sys, time
    from optparse import OptionParser
    from subprocess import Popen, PIPE
    global options

    parser = OptionParser()
    parser.usage = "%prog [options] config-file"
    parser.add_option("-d",
                      "--debug",
                      help="prints debugging info",
                      dest="debug")
    (options, args) = parser.parse_args()

    t0 = time.time()
    # Read the redaction configuration file
    rc = RedactConfig(args[0])

    if not rc.imagefile:
        print "Error: a filename must be specified in the redaction config file"
        sys.exit(1)

    fiwalk.fiwalk_using_sax(imagefile=rc.imagefile,
                            xmlfile=rc.xmlfile,
                            callback=rc.process_file)
    t1 = time.time()

    rc.close_files()

    print "Time to run: %d seconds" % (t1 - t0)

예제 #46

0

파일 보기

파일: demo_plot_times.py 프로젝트: simsong/dfxml

if __name__=="__main__":
    import sys
    from optparse import OptionParser
    from sys import stdout
    parser = OptionParser()
    parser.usage = '%prog [options] (xmlfile or imagefile)'
    (options,args) = parser.parse_args()

    if not args:
        parser.print_usage()
        exit(1)

    sizes = []
    dates = {}
    def callback(fi):
        sizes.append(fi.filesize())
        for (tag,val) in (fi.times().iteritems()):
            date = val.datetime()
            dates[date] = dates.get(date,0)+1

    fn = args[0]
    if fn.endswith(".xml"):
        fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=callback)
    else:
        fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=callback)

    print("Here is the dates array:")
    for d in sorted(dates.keys()):
        print("{}   {}".format(d,dates[d]))

예제 #47

0

파일 보기

파일: ireport.py 프로젝트: Acidburn0zzz/dfxml

def process_files(fn):
    drive_files = {}                         # index of drives
    all_parts  = []
    all_files = []
    files_by_md5 = {}           # a dictionary of sets of fiobject, indexed by md5
    extension_len_histogram = histogram2d()
    extension_fragments_histogram = histogram2d()
    partition_histogram = histogram2d()

    def cb(fi):
        # add the md5 to the set
        if fi.is_file() and fi.filesize():
            files_by_md5.get(fi.md5,set()).add(fi)
            ext = fi.ext()
            if not ext: print fi.meta_type(),fi
            extension_len_histogram.add(ext,fi.filesize())
            extension_fragments_histogram.add(ext,fi.fragments())
            partition_histogram.add(fi.partition(),fi.filesize())

    if fn.endswith('xml'):
        fiwalk.fiwalk_using_sax(xmlfile=open(fn),callback=cb)
    else:
        fiwalk.fiwalk_using_sax(imagefile=open(fn),callback=cb)
    

    #
    # Typeset the information
    #

    tab = ttable()
    tab.header     = "File extension popularity and average size (suppressing 0-len files)"
    tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']]
    tab.omit_row = [[0,'']]
    extension_len_histogram.statcol = ['iaverage','maxx','istddev']
    print extension_len_histogram.typeset(tab=tab)

    #
    # Information about fragmentation patterns
    #
    tab = ttable()
    tab.header="Fragmentation pattern by file system and file type:"
    tab.col_headings = [['Ext','Count','Average Size','Max','Std Dev']]
    tab.omit_row = [[0,'']]
    extension_fragments_histogram.statcol = ['iaverage','maxx','istddev']
    print extension_fragments_histogram.typeset(tab=tab)
    exit(0)

    for fstype in fstypes:
        for ftype in ['jpg','pdf','doc','txt']:
            len1stats = statbag()
            len2stats = statbag()
            delta_hist = histogram()
            delta_re = re.compile("(\d+)\-?(\d+)? ?(\d+)\-?(\d+)?")
            for i in filter( (lambda(f): f.ext()==ftype and f.fragments==2),all_files):
                runs = False
                if(hasattr(i,'block_runs')): runs = i.block_runs
                if(hasattr(i,'sector_runs')): runs = i.sector_runs
                if not runs: continue
                m = delta_re.search(runs)
                r = []
                for j in range(1,5):
                    try:
                        r.append(int(m.group(j)))
                    except TypeError:
                        r.append(int(m.group(j-1)))

                len1 = r[1] - r[0] + 1
                len2 = r[3] - r[2] + 1
                delta = r[2]-r[1]
                
                len1stats.addx(len1)
                len2stats.addx(len2)
                delta_hist.add(delta)

            if len1stats.count()>0:
                print "\n\n"
                print "fstype:",fstype,"  ftype:",ftype
                print "len1 average: %f stddev: %f" % (len1stats.average(),len1stats.stddev())
                print "len2 average: %f stddev: %f" % (len2stats.average(),len2stats.stddev())
                print "delta average: %f" % delta_hist.average()
                print "delta histogram:"
                delta_hist.print_top(10)

예제 #48

0

파일 보기

                      "--debug",
                      help="prints debugging info",
                      dest="debug",
                      action="store_true")
    parser.add_option("-c",
                      "--commit",
                      help="Really do the redaction",
                      action="store_true")
    parser.add_option("--all", help="Do all", action="store_true")
    (options, args) = parser.parse_args()

    # First read all of the redaction files
    for fn in glob("*redacted.xml*"):
        try:
            fiwalk.fiwalk_using_sax(
                xmlfile=open(fn),
                callback=lambda fi: redact_filenames.add(fi.filename()))
        except xml.parsers.expat.ExpatError:
            print "Invalid XML file:", fn
    print "number of filenames in redaction XML:", len(redact_filenames)

    if options.all:
        for fn in glob("*.aff"):
            raw = fn.replace(".aff", ".raw")
            if not os.path.exists(raw):
                print "%s --> %s" % (fn, raw)
                if call(['afconvert', '-e', 'raw', fn]) != 0:
                    raise RuntimeError, "afconvert of %s failed" % fn
        fns = glob("*.raw")
    else:
        fns = args

예제 #49

0

파일 보기

파일: exp_slack.py 프로젝트: pombredanne/bitcurator

  
    #output is to stdout
    outfile = sys.stdout

    #find partition information, blocksize and filesystem
    #1st partition has no. 1, to correspond to fiwalk output
    partitioncounter = 0
    f.write("********************************** PARTITIONS **********************************")
    f.write("\nNo\tBlocksize\tFilesystem\n")

    for line in f:
        if re.search("block_size", line):
            partitioncounter += 1
            f_out.write(str(partitioncounter))
            f_out.write("\t")
            f_out.write(re.split(">|<", line)[2])
        if re.search("ftype_str", line):
            f_out.write("\t\t")
            f_out.write(re.split(">|<", line)[2])
            f_out.write("\n")
    
    f_out.write("\n\n************************************* DATA *************************************\n")
    f_out.write("Partition\tFilename\tSize\tTimestamps\n")
    f.close()

    #re-open file for binary reading
    #file processing
    f = open(file_name, "rb")
        fiwalk.fiwalk_using_sax(xmlfile=f,callback=proc)