def calculateBlockMapHash(self, mapStreamURI, imageStreamURI, storedHashDataType): storedBlockHashesHash = self.getStoredBlockHashes(str(imageStreamURI)) storedBlockHashesHash = sorted(storedBlockHashesHash, cmp=hashLengthComparator) calculatedHash = hashes.new(storedHashDataType) for hash in storedBlockHashesHash: bytes = hash.digest() calculatedHash.update(bytes) bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapPointHash).next().digest() calculatedHash.update(bytes) bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapIdxHash).next().digest() # bytes = self.calculateMapIdxHash(mapStreamURI).digest() calculatedHash.update(bytes) try: bytes = self.resolver.QuerySubjectPredicate( mapStreamURI, self.lexicon.mapPathHash).next().digest() # bytes = self.calculateMapPathHash(mapStreamURI).digest() calculatedHash.update(bytes) except: pass return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def calculateBlockMapHash(self, mapStreamURI, imageStreamURI, storedHashDataType): storedBlockHashesHash = sorted( self.getStoredBlockHashes(str(imageStreamURI)), key=lambda x: hashOrderingMap[x.blockHashAlgo]) calculatedHash = hashes.new(storedHashDataType) for hash in storedBlockHashesHash: bytes = hash.digest() calculatedHash.update(bytes) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapPointHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapIdxHash): calculatedHash.update(hash.digest()) for hash in self.resolver.QuerySubjectPredicate( self.volume_arn, mapStreamURI, self.lexicon.mapPathHash): calculatedHash.update(hash.digest()) return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def testReadMapOfImage(self): fileSize = 629087 # take the lexicon from our new container (version, lex) = Container.identify(self.fileName) # setup a resolver resolver = data_store.MemoryDataStore(lex) # open the two containers within the same resolver (needed so the transitive links work) with zip.ZipFile.NewZipFile(resolver, version, rdfvalue.URN.FromFileName( self.stdLinear)) as targetContainer: with zip.ZipFile.NewZipFile( resolver, version, rdfvalue.URN.FromFileName( self.fileName)) as sourceContainer: # open the virtual file and read image_urn = sourceContainer.urn.Append("pdf1") with resolver.AFF4FactoryOpen(image_urn) as image: # check the size is right self.assertEquals(629087, image.Size()) # read the header of the virtual file image.SeekRead(0, 0) self.assertEquals(b"%PDF", image.Read(4)) # read the whole virtual file and compare with a known hash of it image.SeekRead(0, 0) buf = image.Read(629087) hash = hashes.new(lexicon.HASH_SHA1) hash.update(buf) self.assertEquals( "5A2FEE16139C7B017B7F1961D842D355A860C7AC".lower(), hash.hexdigest())
def calculateBlockHashesHash(self, imageStreamURI): hash = self.getStoredBlockHashes(imageStreamURI) with self.resolver.AFF4FactoryOpen(imageStreamURI) as imageStream: calculatedBlockHashes = [] for h in hash: calculatedBlockHashes.append(hashes.new(h.hashDataType)) offset = 0 while offset < imageStream.size: imageStream.seek(offset) block = imageStream.Read(imageStream.chunk_size) for i in range(len(hash)): calculatedBlockHashesHash = calculatedBlockHashes[i] hashDataType = hash[i].blockHashAlgo # verify the block hash h = hashes.new(hashDataType) h.update(block) calculatedBlockHash = h.hexdigest() chunkIdx = old_div(offset, imageStream.chunk_size) storedBlockHash = imageStream.readBlockHash( chunkIdx, hashDataType) if calculatedBlockHash != storedBlockHash.value: self.listener.onInvalidBlockHash( calculatedBlockHash, storedBlockHash.value, imageStreamURI, offset) else: self.listener.onValidBlockHash(calculatedBlockHash) calculatedBlockHashesHash.update(h.digest()) offset = offset + imageStream.chunk_size # we now have the block hashes hash calculated res = [] for i in range(len(hash)): a = hash[i].blockHashAlgo b = calculatedBlockHashes[i].hexdigest() c = hash[i].hashDataType blockHashesHash = BlockHashesHash(a, b, c) res.append(blockHashesHash) return res
def calculateSegmentHash(self, parentURI, subSegment, hashDataType): calculatedHash = hashes.new(hashDataType) data = self.readSegment(parentURI, subSegment) if data != None: calculatedHash.update(data) b = calculatedHash.hexdigest() return hashes.newImmutableHash(b, hashDataType) else: raise Exception
def calculateMapHash(self, mapURI, storedHashDataType): calculatedHash = hashes.new(storedHashDataType) calculatedHash.update(self.readSegment(mapURI, "map")) calculatedHash.update(self.readSegment(mapURI, "idx")) try: calculatedHash.update(self.readSegment(mapURI, "mapPath")) except: pass return hashes.newImmutableHash(calculatedHash.hexdigest(), storedHashDataType)
def doHash(self, mapURI, hashDataType): hash = hashes.new(hashDataType) if self.isMap(mapURI): with self.resolver.AFF4FactoryOpen(mapURI) as mapStream: remaining = mapStream.Size() count = 0 while remaining > 0: toRead = min(32 * 1024, remaining) data = mapStream.Read(toRead) assert len(data) == toRead remaining -= len(data) hash.update(data) count = count + 1 b = hash.hexdigest() return hashes.newImmutableHash(b, hashDataType) raise Exception("IllegalState")
def doValidateContainer(self): image = next( self.resolver.QueryPredicateObject(lexicon.AFF4_TYPE, self.lexicon.Image)) datastreams = list( self.resolver.QuerySubjectPredicate(image, self.lexicon.dataStream)) calculatedHashes = {} for stream in datastreams: if self.isMap(stream): for imageStreamURI in self.resolver.QuerySubjectPredicate( stream, self.lexicon.dependentStream): parentMap = self.getParentMap(imageStreamURI) if parentMap == stream: # only validate the map and stream pair in the same container self.validateBlockHashesHash(imageStreamURI) self.validateMapIdxHash(parentMap) self.validateMapPointHash(parentMap) self.validateMapPathHash(parentMap) self.validateMapHash(parentMap) calculatedHash = self.validateBlockMapHash( parentMap, imageStreamURI) calculatedHashes[parentMap] = calculatedHash storedHash = next( self.resolver.QuerySubjectPredicate(image, self.lexicon.hash)) hasha = "" hashb = "" parentmap = None # TODO: handle more cleanlythe sematic difference between datatypes if len(list(calculatedHashes.keys())) == 1: # This is a single part image # The single AFF4 hash is just the blockMapHash parentMap = list(calculatedHashes.keys())[0] calculatedHash = calculatedHashes[parentMap] hasha = storedHash hashb = calculatedHash else: # This is a multiple part image # The single AFF4 hash is one layer up in the Merkel tree again, with the # subordinate nodes being the blockMapHashes for the map stored in each container volume # The hash algorithm we use for the single AFF4 hash is the same algorithm we # use for all of the Merkel tree inner nodes firstCalculatedHash = calculatedHashes[list( calculatedHashes.keys())[0]] currentHash = hashes.new(firstCalculatedHash.datatype) # We rely on the natural ordering of the map URN's as they are stored in the map # to order the blockMapHashes in the Merkel tree. for parentMap in list(calculatedHashes.keys()): calculatedHash = calculatedHashes[parentMap] bytes = calculatedHash.digest() currentHash.update(bytes) hasha = storedHash.value hashb = currentHash.hexdigest() if hasha != hashb: self.listener.onInvalidHash("AFF4Hash", hasha, hashb, parentMap) else: self.listener.onValidHash("AFF4Hash", hasha, parentMap)
def doValidateContainer(self): # FIXME: This should further restrict by container URN since # the same data store may be used for multiple containers with # many images. for image in self.resolver.QueryPredicateObject( self.volume_arn, lexicon.AFF4_TYPE, self.lexicon.Image): datastreams = list( self.resolver.QuerySubjectPredicate(self.volume_arn, image, self.lexicon.dataStream)) calculated_hashes = collections.OrderedDict() hash_datatype = None for stream in datastreams: if self.isMap(stream): for image_stream_uri in self.resolver.QuerySubjectPredicate( self.volume_arn, stream, self.lexicon.dependentStream): parent_map = self.getParentMap(image_stream_uri) if parent_map == stream: # only validate the map and stream pair in the same container self.validateBlockHashesHash(image_stream_uri) self.validateMapIdxHash(parent_map) self.validateMapPointHash(parent_map) self.validateMapPathHash(parent_map) self.validateMapHash(parent_map) calculated_hash = self.validateBlockMapHash( parent_map, image_stream_uri) calculated_hashes[parent_map] = calculated_hash # Assume all block hashes are the same type. if (hash_datatype is not None and hash_datatype != calculated_hash.datatype): raise AttributeError( "Block hashes are not all the same type.") else: hash_datatype = calculated_hash.datatype for stored_hash in self.resolver.QuerySubjectPredicate( self.volume_arn, image, self.lexicon.hash): hasha = "" hashb = "" parent_map = None # TODO: handle more cleanly the sematic difference between datatypes if len(calculated_hashes) == 1: # This is a single part image # The single AFF4 hash is just the blockMapHash parent_map, calculated_hash = calculated_hashes.popitem() hasha = stored_hash hashb = calculated_hash else: # This is a multiple part image The single AFF4 # hash is one layer up in the Merkel tree again, # with the subordinate nodes being the # blockMapHashes for the map stored in each # container volume # The hash algorithm we use for the single AFF4 # hash is the same algorithm we use for all of the # Merkel tree inner nodes current_hash = hashes.new(hash_datatype) # The canonical striped images and Evimetry rely on the natural ordering # (string comparison) of the map URN's # as they are stored in the map to order the # blockMapHashes in the Merkel tree. # # For example for a striped image composed of two containers, we would have one map per # container. c1 -- > aff4://363ac10c-8d8d-4905-ac25-a14aaddd8a41 # c2 --> aff4://2dd04819-73c8-40e3-a32b-fdddb0317eac # At this level of the merkel tree, we order the concatenated hashes based on # the map URI, so we would calculate the hash from c2 then c1 # TODO: update the specification to reflect this rule for parent_map, calculated_hash in sorted( calculated_hashes.items()): current_hash.update(calculated_hash.digest()) hasha = stored_hash.value hashb = current_hash.hexdigest() if hasha != hashb: self.listener.onInvalidHash("AFF4Hash", hasha, hashb, parent_map) else: self.listener.onValidHash("AFF4Hash", hasha, parent_map)
def writeLogicalStreamHashBased(self, filename, readstream, length, check_bytes=False): logical_file_id = None if self.isAFF4Collision(filename): logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) chunk_size = self.block_store_stream.chunk_size with aff4_map.AFF4Map.NewAFF4Map( self.resolver, logical_file_id, self.urn) as logical_file_map: file_offset = 0 while file_offset < length: toread = min(length-file_offset, chunk_size) chunk = readstream.read(toread) # pad the chunk to chunksize if it is small read_chunk_size = len(chunk) if read_chunk_size < chunk_size: chunk = chunk + b"\x00" * (chunk_size - read_chunk_size) h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) # we use RFC rfc4648 hashid = rdfvalue.URN("aff4:sha512:" + base64.urlsafe_b64encode(h.digest()).decode()) # check if this hash is in the container already existing_bytestream_reference_id = self.resolver.GetUnique(lexicon.any, hashid, rdfvalue.URN(lexicon.standard.dataStream)) if existing_bytestream_reference_id == None: block_stream_address = self.block_store_stream.TellWrite() self.block_store_stream.Write(chunk) chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % (block_stream_address, chunk_size) chunk_reference_id = rdfvalue.URN(chunk_reference_id) self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard.dataStream), chunk_reference_id) logical_file_map.AddRange(file_offset, 0, toread, hashid) #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, chunk_reference_id)) else: if check_bytes: with self.resolver.AFF4FactoryOpen(existing_bytestream_reference_id) as existing_chunk_stream: existing_chunk_length = existing_chunk_stream.length existing_chunk = existing_chunk_stream.Read(existing_chunk_length) if chunk != existing_chunk: # we hit the jackpot and found a hash collision # in this highly unlikely event, we store the new bytes using regular logical # imaging. To record the collision, we add the colliding stream as a property print("!!!Collision found for hash %s" % hashid) block_stream_address = self.block_store_stream.TellWrite() self.block_store_stream.Write(chunk) chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % ( block_stream_address, chunk_size) chunk_reference_id = rdfvalue.URN(chunk_reference_id) logical_file_map.AddRange(file_offset, block_stream_address, chunk_size, self.block_store_stream.urn) self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard11.collidingDataStream), chunk_reference_id) else: logical_file_map.AddRange(file_offset, 0, toread, hashid) else: logical_file_map.AddRange(file_offset, 0, toread, hashid) #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, existing_bytestream_reference_id)) file_offset += toread logical_file_map.Close() self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return logical_file_id
def writeLogicalStreamRabinHashBased(self, filename, readstream, length, check_bytes=False): logical_file_id = None if self.isAFF4Collision(filename): logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) chunk_size = 32*1024 cdc = fastchunking.RabinKarpCDC(window_size=48, seed=0) chunker = cdc.create_chunker(chunk_size=4096) with aff4_map.AFF4Map.NewAFF4Map( self.resolver, logical_file_id, self.urn) as logical_file_map: file_offset = 0 lastbuffer = None lastoffset = 0 chunk_offset = 0 while file_offset < length: toread = min(length-file_offset, chunk_size) buffer = readstream.read(toread) foundBoundaries = False for boundary in chunker.next_chunk_boundaries(buffer): foundBoundaries = True if lastbuffer != None: l = len(lastbuffer) chunk = lastbuffer[lastoffset:] chunk_offset = file_offset - len(chunk) chunk = chunk + buffer[:boundary] lastbuffer = None else: chunk = buffer[lastoffset:boundary] chunk_offset = file_offset + lastoffset h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes) lastoffset = boundary if not foundBoundaries: if lastbuffer != None: lastbuffer = lastbuffer + buffer else: lastbuffer = buffer else: lastbuffer = buffer file_offset += toread if lastbuffer != None and lastoffset < len(lastbuffer): chunk = lastbuffer[lastoffset:] chunk_offset = file_offset - len(chunk) h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes) logical_file_map.Close() self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return logical_file_id
def doValidateContainer(self): # FIXME: This should further restrict by container URN since # the same data store may be used for multiple containers with # many images. for image in self.resolver.QueryPredicateObject( lexicon.AFF4_TYPE, self.lexicon.Image): datastreams = list( self.resolver.QuerySubjectPredicate(image, self.lexicon.dataStream)) calculated_hashes = collections.OrderedDict() hash_datatype = None for stream in datastreams: if self.isMap(stream): for image_stream_uri in self.resolver.QuerySubjectPredicate( stream, self.lexicon.dependentStream): parent_map = self.getParentMap(image_stream_uri) if parent_map == stream: # only validate the map and stream pair in the same container self.validateBlockHashesHash(image_stream_uri) self.validateMapIdxHash(parent_map) self.validateMapPointHash(parent_map) self.validateMapPathHash(parent_map) self.validateMapHash(parent_map) calculated_hash = self.validateBlockMapHash( parent_map, image_stream_uri) calculated_hashes[parent_map] = calculated_hash # Assume all block hashes are the same type. if (hash_datatype is not None and hash_datatype != calculated_hash.datatype): raise AttributeError( "Block hashes are not all the same type.") else: hash_datatype = calculated_hash.datatype for stored_hash in self.resolver.QuerySubjectPredicate( image, self.lexicon.hash): hasha = "" hashb = "" parent_map = None # TODO: handle more cleanly the sematic difference between datatypes if len(calculated_hashes) == 1: # This is a single part image # The single AFF4 hash is just the blockMapHash parent_map, calculated_hash = calculated_hashes.popitem() hasha = stored_hash hashb = calculated_hash else: # This is a multiple part image The single AFF4 # hash is one layer up in the Merkel tree again, # with the subordinate nodes being the # blockMapHashes for the map stored in each # container volume # The hash algorithm we use for the single AFF4 # hash is the same algorithm we use for all of the # Merkel tree inner nodes current_hash = hashes.new(hash_datatype) # FIXME: This is a flaw in the scheme since there # is no reasonable order specified. We temporarily # sort the results to get the test to pass but # this needs to be properly addressed. # We rely on the natural ordering of the map URN's # as they are stored in the map to order the # blockMapHashes in the Merkel tree. for parent_map, calculated_hash in sorted( calculated_hashes.items()): current_hash.update(calculated_hash.digest()) hasha = stored_hash.value hashb = current_hash.hexdigest() if hasha != hashb: self.listener.onInvalidHash("AFF4Hash", hasha, hashb, parent_map) else: self.listener.onValidHash("AFF4Hash", hasha, parent_map)