def build_maps(self, index_file): hits = self.load_index(index_file) image_fd = open(self.args[0], 'r') zip_files = {} for ecd_offset in hits['EndCentralDirectory']: ## Each EndCentralDirectory represents a new Zip file r = Carver.Reassembler(None) b = Buffer(image_fd)[ecd_offset:] ecd = Zip.EndCentralDirectory(b) print "End Central Directory at offset %s:" % (ecd_offset, ) ## Find the CD: offset_of_cd = ecd['offset_of_cd'].get_value() ## Check if the cd is where we think it should be: possibles = [] for x in hits['CDFileHeader']: if x == ecd_offset - ecd['size_of_cd'].get_value(): ## No fragmentation in CD: print "No fragmentation in Central Directory at offset %s discovered... good!" % x possibles = [ x, ] break if x % 512 == offset_of_cd % 512: print "Possible Central Directory Starts at %s" % x possibles.append(x) ## FIXME: this needs to be made to estimate the most similar ## possibility - we really have very little to go on here - ## how can we distinguish between two different CDs that occur ## in the same spot? I dont think its very likely in reality ## because the CD will be at the end of the zip file which ## will be of varying sizes. ## We probably should prefer the CD found at image offset ## of ecd - ecd['size_of_cd'] which will be the case if ## the CD is not fragmented. ## For now we go with the first possibility: cd_image_offset = possibles[0] ## Identify the central directory: r.add_point(offset_of_cd, cd_image_offset, "Central_Directory") ## We can calculate the offset of ecd here: r.add_point(offset_of_cd + ecd['size_of_cd'].get_value(), ecd_offset, "End_Central_Directory") ## The file end - this is used to stop the carver: r.add_point( offset_of_cd + ecd['size_of_cd'].get_value() + ecd.size(), ecd_offset + ecd.size(), "EOF") ## Read all entries in the CD and try to locate their ## corresponding ZipFileHeaders: for i in range(ecd['total_entries_in_cd_on_disk'].get_value()): b = Buffer(image_fd)[cd_image_offset:] cd = Zip.CDFileHeader(b) ## Now try to find the ZipFileHeader for this cd entry: fh_offset = cd['relative_offset_local_header'].get_value() for fh_image_offset in hits['ZipFileHeader']: ## Apply the modulo rule: if fh_image_offset % 512 == fh_offset % 512: print "Possible File header at image offset %s" % fh_image_offset b = Buffer(image_fd)[fh_image_offset:] try: fh = Zip.ZipFileHeader(b) except: print "Oops - no File Header here... continuing" continue ## Is it the file we expect? path = fh['zip_path'].get_value() expected_path = cd['filename'].get_value() ## Check the paths: if path and expected_path and path != expected_path: print "This ZipFileHeader is for %s, while we wanted %s" % ( path, expected_path) continue ## Check the expected lengths with the central directory: cd_compr_size = cd['compressed_size'].get_value() cd_uncompr_size = cd['uncompr_size'].get_value() fh_comr_size = fh['compr_size'].get_value() fh_uncomr_size = fh['uncompr_size'].get_value() if cd_compr_size and fh_comr_size and cd_compr_size != fh_comr_size: print "Compressed size does not match (%s - expected %s)" % ( cd_compr_size, fh_comr_size) continue if cd_uncompr_size and fh_uncomr_size and cd_uncompr_size != fh_uncomr_size: print "Uncompressed size does not match (%s - expected %s)" % ( cd_uncompr_size, fh_uncomr_size) continue print "Will use Zip File Header at %s." % ( fh_image_offset) ## Identify point: r.add_point(fh_offset, fh_image_offset, "File_%s" % path) ## Progress to the next file in the archive: cd_image_offset += cd.size() r.save_map("%s.map" % ecd_offset)
def decode_ecd_header(self, b, length_to_test): ecd = Zip.EndCentralDirectory(b) print "Found ECD %s" % ecd return ecd.size()