Ejemplo n.º 1
0
    def add_inode(self, fd, offset, factories):
        """ We think we have a zip file here. """
        b = Zip.Buffer(fd=fd)[offset:]
        try:
            header = Zip.ZipFileHeader(b)
            size = int(header['uncompr_size'])
            compressed_length = int(header['compr_size'])

            ## Some zip programs seem to leave this at 0 - because its
            ## already in the central directory. Unfortunately the
            ## carver currently does not look at the central directory
            ## - so we just make it a reasonable value
            if compressed_length==0:
                compressed_length = 100*1024
                
            name = header['zip_path'].get_value()
	    if len(name)==0 or invalid_filename.search(name):
                pyflaglog.log(pyflaglog.DEBUG, "Thought the name %r is invalid - skipping file" % name[:10])
                return 10

            header_offset = header['data'].buffer.offset
        except:
            return 10

        new_inode = "%s|Z%s:%s" % (fd.inode, offset, compressed_length)
        self._add_inode(new_inode, size, name, fd, factories)
        return size
Ejemplo n.º 2
0
class ZipFile(File):
    """ A file like object to read files from within zip files.

    We essentially decompress the file on the disk because the file
    may be exceptionally large.
    """
    specifier = 'Z'
    
    def __init__(self, case, fd, inode):
        File.__init__(self, case, fd, inode)

        ## Make sure our parent is cached:
        self.fd.cache()

        ## Parse out inode - if we got the compressed length provided,
        ## we use that, otherwise we calculate it from the zipfile
        ## header
        parts = inode.split('|')
        ourpart = parts[-1][1:]
        try:
            offset, size = ourpart.split(":")
            self.compressed_length = int(size)
            offset = int(offset)
        except:
            offset = int(ourpart)

        self.offset = offset
        ## Ensure that we can read the file header:
        b = Zip.Buffer(fd=fd)[offset:]
        self.header = Zip.ZipFileHeader(b)

        ## This is sometimes invalid and set to zero - should we query
        ## the db?
        self.size = int(self.header['uncompr_size'])
        
        if not self.compressed_length:
            self.compressed_length = int(self.header['compr_size'])
            
        self.type = int(self.header['compression_method'])

        ## Where does the data start?
        self.init()
Ejemplo n.º 3
0
    def __init__(self, reassembler):
        self.r = reassembler

        ## Try to load the central directory if possible: This may
        ## fail if the cd is fragmented. FIXME: be able to handle
        ## fragmentation at the CD.
        cd_x = self.r.get_point("Central_Directory")
        self.cds = []
        if cd_x:
            b = Buffer(self.r)[cd_x:]
            while 1:
                try:
                    cd = Zip.CDFileHeader(b)
                except RuntimeError, e:
                    print "Finished reading CD (%s items)" % len(self.cds)
                    break

                self.cds.append(cd)
                b = b[cd.size():]
Ejemplo n.º 4
0
    def build_maps(self, index_file):
        hits = self.load_index(index_file)

        image_fd = open(self.args[0], 'r')
        zip_files = {}

        for ecd_offset in hits['EndCentralDirectory']:
            ## Each EndCentralDirectory represents a new Zip file
            r = Carver.Reassembler(None)
            b = Buffer(image_fd)[ecd_offset:]
            ecd = Zip.EndCentralDirectory(b)
            print "End Central Directory at offset %s:" % (ecd_offset, )

            ## Find the CD:
            offset_of_cd = ecd['offset_of_cd'].get_value()

            ## Check if the cd is where we think it should be:
            possibles = []
            for x in hits['CDFileHeader']:
                if x == ecd_offset - ecd['size_of_cd'].get_value():
                    ## No fragmentation in CD:
                    print "No fragmentation in Central Directory at offset %s discovered... good!" % x
                    possibles = [
                        x,
                    ]
                    break

                if x % 512 == offset_of_cd % 512:
                    print "Possible Central Directory Starts at %s" % x
                    possibles.append(x)

            ## FIXME: this needs to be made to estimate the most similar
            ## possibility - we really have very little to go on here -
            ## how can we distinguish between two different CDs that occur
            ## in the same spot? I dont think its very likely in reality
            ## because the CD will be at the end of the zip file which
            ## will be of varying sizes.

            ## We probably should prefer the CD found at image offset
            ## of ecd - ecd['size_of_cd'] which will be the case if
            ## the CD is not fragmented.

            ## For now we go with the first possibility:
            cd_image_offset = possibles[0]

            ## Identify the central directory:
            r.add_point(offset_of_cd, cd_image_offset, "Central_Directory")

            ## We can calculate the offset of ecd here:
            r.add_point(offset_of_cd + ecd['size_of_cd'].get_value(),
                        ecd_offset, "End_Central_Directory")

            ## The file end - this is used to stop the carver:
            r.add_point(
                offset_of_cd + ecd['size_of_cd'].get_value() + ecd.size(),
                ecd_offset + ecd.size(), "EOF")

            ## Read all entries in the CD and try to locate their
            ## corresponding ZipFileHeaders:
            for i in range(ecd['total_entries_in_cd_on_disk'].get_value()):
                b = Buffer(image_fd)[cd_image_offset:]
                cd = Zip.CDFileHeader(b)

                ## Now try to find the ZipFileHeader for this cd entry:
                fh_offset = cd['relative_offset_local_header'].get_value()

                for fh_image_offset in hits['ZipFileHeader']:
                    ## Apply the modulo rule:
                    if fh_image_offset % 512 == fh_offset % 512:
                        print "Possible File header at image offset %s" % fh_image_offset

                        b = Buffer(image_fd)[fh_image_offset:]
                        try:
                            fh = Zip.ZipFileHeader(b)
                        except:
                            print "Oops - no File Header here... continuing"
                            continue

                        ## Is it the file we expect?
                        path = fh['zip_path'].get_value()
                        expected_path = cd['filename'].get_value()

                        ## Check the paths:
                        if path and expected_path and path != expected_path:
                            print "This ZipFileHeader is for %s, while we wanted %s" % (
                                path, expected_path)
                            continue

                        ## Check the expected lengths with the central directory:
                        cd_compr_size = cd['compressed_size'].get_value()
                        cd_uncompr_size = cd['uncompr_size'].get_value()

                        fh_comr_size = fh['compr_size'].get_value()
                        fh_uncomr_size = fh['uncompr_size'].get_value()

                        if cd_compr_size and fh_comr_size and cd_compr_size != fh_comr_size:
                            print "Compressed size does not match (%s - expected %s)" % (
                                cd_compr_size, fh_comr_size)
                            continue

                        if cd_uncompr_size and fh_uncomr_size and cd_uncompr_size != fh_uncomr_size:
                            print "Uncompressed size does not match (%s - expected %s)" % (
                                cd_uncompr_size, fh_uncomr_size)
                            continue

                        print "Will use Zip File Header at %s." % (
                            fh_image_offset)

                        ## Identify point:
                        r.add_point(fh_offset, fh_image_offset,
                                    "File_%s" % path)

                ## Progress to the next file in the archive:
                cd_image_offset += cd.size()

            r.save_map("%s.map" % ecd_offset)
Ejemplo n.º 5
0
    def decode_ecd_header(self, b, length_to_test):
        ecd = Zip.EndCentralDirectory(b)

        print "Found ECD %s" % ecd
        return ecd.size()
Ejemplo n.º 6
0
    def decode_cd_file(self, b, length_to_test):
        cd = Zip.CDFileHeader(b)
        print "Found CD Header: %s" % cd['filename']

        return cd.size()
Ejemplo n.º 7
0
    def decode_file(self, b, length_to_test):
        """ Attempts to decode and verify a ZipFileHeader """
        fh = Zip.ZipFileHeader(b)
        #print "Zip File Header @ offset %s (name %s) " % (b.offset, fh['zip_path'])

        ## The following is necessary because some Zip writers do not
        ## write the same information in both the ZipFileHeader and
        ## CDFileHeader - FIXME: what do we do if the information is
        ## actually different but set? (This is a common way for
        ## malware to break email filtering or virus scanners ala zip
        ## bombs).
        compression_method = fh['compression_method'].get_value()
        compressed_size = fh['compr_size'].get_value()
        uncompr_size = fh['uncompr_size'].get_value()
        crc32 = fh['crc32'].get_value()

        for cd in self.cds:
            if cd['filename'] == fh['zip_path']:
                ## Found the CD entry for our file, if any of the
                ## above parameters are not set in the ZipFileHeader,
                ## try to get them from the CD:
                if not compression_method:
                    compression_method = cd['compression'].get_value()

                if not compressed_size:
                    compressed_size = cd['compressed_size'].get_value()

                if not uncompr_size:
                    uncompr_size = cd['uncompr_size'].get_value()

                if not crc32:
                    crc32 = cd['crc-32'].get_value()

        ## Deflate:
        if compression_method == 8:
            dc = zlib.decompressobj(-15)
            crc = 0

            self.offset = b.offset + fh.size()
            self.r.seek(self.offset)

            total = 0

            to_read = compressed_size

            while to_read > 0:
                cdata = self.r.read(min(SECTOR_SIZE, to_read))
                #print "Read %s" % len(cdata)
                to_read -= len(cdata)
                data = dc.decompress(cdata)
                total += len(data)
                self.offset += len(cdata)
                crc = binascii.crc32(data, crc)

                ## Only test as much as was asked
                if self.offset > length_to_test: return length_to_test

            ## Finalise the data:
            ex = dc.decompress('Z') + dc.flush()
            total += len(ex)
            crc = binascii.crc32(ex, crc)

            if total != uncompr_size:
                print "Total decompressed data: %s (%s)" % (total,
                                                            uncompr_size)
                raise IOError(
                    "Decompressed file does not have the expected length")

            if crc < 0: crc = crc + (1 << 32)
            if crc != crc32:
                print "CRC is %d %s" % (crc, crc32)
                raise IOError("CRC does not match")

        else:
            print "Unable to verify compression_method %s - not implemented, skipping file" % compression_method

        ## Sometimes there is some padding before the next file is
        ## written. We try to account for this if possible by scanning
        ## ahead a little bit. This occurs if the file has a data
        ## descriptor record. We ignore this record because its values
        ## are usually present in the CD anyway.
        total_size = fh.size() + compressed_size

        data = self.r.read(SECTOR_SIZE)
        m = zip_header_re.search(data)
        if m:
            total_size += m.start()

        #print fh
        return total_size